From c6a6475743c2cd9f182bb6755c622a21ed2e50b8 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 29 Dec 2023 16:55:42 +0000 Subject: [PATCH 01/71] EXSWHTEC-266 - Implement new and extend existing tests for coalesced group partitioning #155 Change-Id: Ia971913ef7550e9ce4c886c044c30dc46578beb3 --- catch/unit/cooperativeGrps/CMakeLists.txt | 1 + .../coalesced_group_tiled_partition.cc | 685 ++++++++++++++++++ .../cooperative_groups_common.hh | 1 + 3 files changed, 687 insertions(+) create mode 100644 catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc diff --git a/catch/unit/cooperativeGrps/CMakeLists.txt b/catch/unit/cooperativeGrps/CMakeLists.txt index 1a5f234e3b..9732e58f23 100644 --- a/catch/unit/cooperativeGrps/CMakeLists.txt +++ b/catch/unit/cooperativeGrps/CMakeLists.txt @@ -2,6 +2,7 @@ set(TEST_SRC thread_block.cc thread_block_tile.cc + coalesced_group_tiled_partition.cc hipCGThreadBlockType_old.cc hipCGMultiGridGroupType_old.cc hipCGGridGroupType_old.cc diff --git a/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc b/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc new file mode 100644 index 0000000000..f14a60caea --- /dev/null +++ b/catch/unit/cooperativeGrps/coalesced_group_tiled_partition.cc @@ -0,0 +1,685 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "cooperative_groups_common.hh" + +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * @addtogroup coalesced_group_tile coalesced_group_tile + * @{ + * @ingroup DeviceLanguageTest + * Contains unit tests for partitioning of coalesced groups into tiled partitions + */ + +namespace cg = cooperative_groups; + +namespace { +#if HT_AMD +constexpr auto kMaskMin = std::numeric_limits().min(); +constexpr auto kMaskLimit = std::numeric_limits().max(); +#else +constexpr auto kMaskMin = std::numeric_limits().min(); +constexpr auto kMaskLimit = std::numeric_limits().max(); +#endif +} // namespace + +static unsigned int GenerateTileSizes() { +#if HT_AMD + return GENERATE(2u, 4u, 8u, 16u, 32u, 64u); +#else + return GENERATE(2u, 4u, 8u, 16u, 32u); +#endif +} + +static inline std::mt19937& GetRandomGenerator() { + static std::mt19937 mt(11); + return mt; +} + +template static inline T GenerateRandomInteger(const T min, const T max) { + std::uniform_int_distribution dist(min, max); + return dist(GetRandomGenerator()); +} + +template static auto coalesce_threads(const uint64_t mask) { + std::tuple, unsigned int> res; + auto& [threads, count] = res; + + count = 0u; + for (auto i = 0u; i < warp_size; ++i) { + if (mask & (1u << i)) { + threads[count++] = i; + } + } + + return res; +} + +template __device__ bool deactivate_thread(uint64_t* active_masks) { + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + const auto block = cg::this_thread_block(); + const auto warps_per_block = (block.size() + warp_size - 1) / warp_size; + const auto block_rank = (blockIdx.z * gridDim.y + blockIdx.y) * gridDim.x + blockIdx.x; + const auto idx = block_rank * warps_per_block + block.thread_rank() / warp.size(); + + return !(active_masks[idx] & (1u << warp.thread_rank())); +} + + +template +__global__ void coalesced_group_tiled_partition_size_getter(uint64_t* active_masks, + unsigned int tile_size, + unsigned int* sizes) { + if (deactivate_thread(active_masks)) { + return; + } + sizes[thread_rank_in_grid()] = cg::tiled_partition(cg::coalesced_threads(), tile_size).size(); +} + +template +__global__ void coalesced_group_tiled_partition_thread_rank_getter(uint64_t* active_masks, + unsigned int tile_size, + unsigned int* sizes) { + if (deactivate_thread(active_masks)) { + return; + } + + sizes[thread_rank_in_grid()] = + cg::tiled_partition(cg::coalesced_threads(), tile_size).thread_rank(); +} + +/** + * Test Description + * ------------------------ + * - Deactivates threads based on passed in mask and creates tiled partitions over coalesced + * threads for each of the valid sizes{2, 4, 8, 16, 32, 64(if AMD)} and writes the return values of + * size and thread_rank member functions to an output array that is validated on the host side. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Getters_Positive_Basic") { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensions(); + auto threads = GenerateThreadDimensions(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(unsigned int); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(0u, std::numeric_limits().max()); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_size_getter<32> + <<>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_thread_rank_getter<32> + <<>>(active_masks_dev.ptr(), tile_size, uint_arr_dev.ptr()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + // validate size + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + const auto num_tiles = (active_thread_count + tile_size - 1) / tile_size; + const auto tile_tail = num_tiles * tile_size - active_thread_count; + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = tile_size - tile_tail * (t + tile_size >= active_thread_count); + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } + + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + // validate rank + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = k % tile_size; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + + +template +__global__ void coalesced_group_tiled_partition_shfl_up(uint64_t* active_masks, T* const out, + const unsigned int tile_size, + const unsigned int delta) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl_up(var, delta); +} + + +template static void CoalescedGroupTiledPartitonShflUpTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const auto delta = GENERATE_COPY(range(0u, tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl_up + <<>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t + delta; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = active_threads[k - delta]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle up behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl_up. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Up_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflUpTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_shfl_down(uint64_t* active_masks, T* const out, + const unsigned int tile_size, + const unsigned int delta) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl_down(var, delta); +} + + +template static void CoalescedGroupTiledPartitonShflDownTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + const auto delta = GENERATE_COPY(range(0u, tile_size)); + INFO("Delta: " << delta); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl_down + <<>>(active_masks_dev.ptr(), uint_arr_dev.ptr(), tile_size, delta); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + if (delta >= active_thread_count) { + continue; + } + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size - delta; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count - delta; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto expected_val = active_threads[k + delta]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle down behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl_down. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Down_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflDownTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_shfl(uint64_t* active_masks, uint8_t* target_lanes, + T* const out, const unsigned int tile_size) { + if (deactivate_thread(active_masks)) { + return; + } + const cg::thread_block_tile warp = + cg::tiled_partition(cg::this_thread_block()); + T var = static_cast(warp.thread_rank()); + + const auto tile = cg::tiled_partition(cg::coalesced_threads(), tile_size); + out[thread_rank_in_grid()] = tile.shfl(var, target_lanes[tile.thread_rank()]); +} + +template static void CoalescedGroupTiledPartitonShflTestImpl() { + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + LinearAllocGuard uint_arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard uint_arr(LinearAllocs::hipHostMalloc, alloc_size); + + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard target_lanes_dev(LinearAllocs::hipMalloc, tile_size * sizeof(uint8_t)); + LinearAllocGuard target_lanes(LinearAllocs::hipHostMalloc, tile_size * sizeof(uint8_t)); + + std::generate(target_lanes.ptr(), target_lanes.ptr() + tile_size, + [tile_size] { return GenerateRandomInteger(0, static_cast(2 * tile_size)); }); + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(target_lanes_dev.ptr(), target_lanes.ptr(), tile_size * sizeof(uint8_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemsetAsync(uint_arr_dev.ptr(), 0, alloc_size)); + coalesced_group_tiled_partition_shfl<<>>( + active_masks_dev.ptr(), target_lanes_dev.ptr(), uint_arr_dev.ptr(), tile_size); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipMemcpy(uint_arr.ptr(), uint_arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + + for (auto i = 0u; i < warps_in_grid; ++i) { + auto current_warp_mask = active_masks.ptr()[i]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((i + 1) % warps_in_block); + current_warp_mask = (current_warp_mask << shift_amount) >> shift_amount; + + const auto [active_threads, active_thread_count] = + coalesce_threads(current_warp_mask); + + const auto tails = tail * (i / warps_in_block) * (i >= warps_in_block); + // Step tile-sized window over active threads + for (auto t = 0u; t < active_thread_count; t += tile_size) { + const auto window_start = t; + const auto window_end = t + tile_size; + // Iterate through window + for (auto k = window_start; k < window_end && k < active_thread_count; ++k) { + const auto global_thread_idx = i * kWarpSize + active_threads[k] - tails; + const auto target_lane = target_lanes.ptr()[k % tile_size]; + if (target_lane >= tile_size || target_lane >= active_thread_count - t) { + continue; + } + const auto expected_val = active_threads[t + target_lane]; + const auto actual_val = uint_arr.ptr()[global_thread_idx]; + INFO("global index: " << global_thread_idx); + if (actual_val != expected_val) { + REQUIRE(actual_val == expected_val); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Validates the shuffle behavior of tiled partitions of all valid sizes{2, 4, 8, 16, 32, + * 64(if AMD)} for delta values of [0, tile size). The partitions are created over a coalesced + * group, with memberships of threads in the coalesced group being controlled via a passed in active + * mask. The test is run for all overloads of shfl. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Shfl_Positive_Basic", "", int, + unsigned int, long, unsigned long, long long, unsigned long long, float, + double) { + CoalescedGroupTiledPartitonShflTestImpl(); +} + + +template +__global__ void coalesced_group_tiled_partition_sync_check(uint64_t* active_masks, T* global_data, + unsigned int* wait_modifiers, + size_t tile_size) { + if (deactivate_thread(active_masks)) { + return; + } + + extern __shared__ uint8_t shared_data[]; + T* const data = use_global ? global_data : reinterpret_cast(shared_data); + const auto tid = cg::this_grid().thread_rank(); + const auto block = cg::this_thread_block(); + const auto coalesced = cg::coalesced_threads(); + const auto partition = cg::tiled_partition(coalesced, tile_size); + const auto data_idx = [&block](unsigned int i) { return use_global ? i : (i % block.size()); }; + + const auto wait_modifier = wait_modifiers[tid]; + + const auto block_rank = tid / block.size(); + const auto warp_rank = block.thread_rank() / warp_size; + const auto warp_base = block_rank * block.size() + warp_rank * warp_size; + const auto global_idx = warp_base + coalesced.thread_rank(); + + busy_wait(wait_modifier); + data[data_idx(global_idx)] = partition.thread_rank(); + partition.sync(); + + bool valid = true; + const auto tile_rank = coalesced.thread_rank() / tile_size; + for (auto i = 0u; i < tile_size; ++i) { + const auto target_rank_in_tile = (coalesced.thread_rank() + i) % tile_size; + const auto target_rank_in_warp = tile_rank * tile_size + target_rank_in_tile; + if (target_rank_in_warp >= coalesced.size()) { + continue; + } + if (!(valid &= (data[data_idx(warp_base + target_rank_in_warp)] == target_rank_in_tile))) { + break; + } + } + // Validate + partition.sync(); + data[data_idx(global_idx)] = valid; + if constexpr (!use_global) { + global_data[global_idx] = data[data_idx(global_idx)]; + } +} + +template void CoalescedGroupTiledPartitionSyncTest() { + const auto randomized_run_count = GENERATE(range(0, cmd_options.cg_iterations)); + INFO("Run number: " << randomized_run_count + 1); + const auto tile_size = GenerateTileSizes(); + INFO("Tile size: " << tile_size); + auto blocks = GenerateBlockDimensionsForShuffle(); + auto threads = GenerateThreadDimensionsForShuffle(); + INFO("Grid dimensions: x " << blocks.x << ", y " << blocks.y << ", z " << blocks.z); + INFO("Block dimensions: x " << threads.x << ", y " << threads.y << ", z " << threads.z); + CPUGrid grid(blocks, threads); + + const auto alloc_size = grid.thread_count_ * sizeof(T); + const auto alloc_size_per_block = alloc_size / grid.block_count_; + int max_shared_mem_per_block = 0; + HIP_CHECK(hipDeviceGetAttribute(&max_shared_mem_per_block, + hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + if (!global_memory && (max_shared_mem_per_block < alloc_size_per_block)) { + return; + } + + LinearAllocGuard arr_dev(LinearAllocs::hipMalloc, alloc_size); + LinearAllocGuard arr(LinearAllocs::hipHostMalloc, alloc_size); + LinearAllocGuard wait_modifiers_dev(LinearAllocs::hipMalloc, + grid.thread_count_ * sizeof(unsigned int)); + LinearAllocGuard wait_modifiers(LinearAllocs::hipHostMalloc, + grid.thread_count_ * sizeof(unsigned int)); + const auto warps_in_block = (grid.threads_in_block_count_ + kWarpSize - 1) / kWarpSize; + const auto warps_in_grid = warps_in_block * grid.block_count_; + LinearAllocGuard active_masks_dev(LinearAllocs::hipMalloc, + warps_in_grid * sizeof(uint64_t)); + LinearAllocGuard active_masks(LinearAllocs::hipHostMalloc, + warps_in_grid * sizeof(uint64_t)); + if (randomized_run_count != 0) { + std::generate(wait_modifiers.ptr(), wait_modifiers.ptr() + grid.thread_count_, + [] { return GenerateRandomInteger(0u, 1500u); }); + } else { + std::fill_n(wait_modifiers.ptr(), grid.thread_count_, 0u); + } + std::generate(active_masks.ptr(), active_masks.ptr() + warps_in_grid, + [] { return GenerateRandomInteger(kMaskMin, kMaskLimit); }); + + HIP_CHECK(hipMemcpy(active_masks_dev.ptr(), active_masks.ptr(), warps_in_grid * sizeof(uint64_t), + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(wait_modifiers_dev.ptr(), wait_modifiers.ptr(), + grid.thread_count_ * sizeof(unsigned int), hipMemcpyHostToDevice)); + + const auto shared_memory_size = global_memory ? 0u : alloc_size_per_block; + coalesced_group_tiled_partition_sync_check + <<>>(active_masks_dev.ptr(), arr_dev.ptr(), + wait_modifiers_dev.ptr(), tile_size); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(arr.ptr(), arr_dev.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + const auto tail = warps_in_block * kWarpSize - grid.threads_in_block_count_; + for (int i = 0u; i < grid.block_count_; ++i) { + for (int j = 0u; j < warps_in_block; ++j) { + const auto warp_idx = i * warps_in_block + j; + auto mask = active_masks.ptr()[warp_idx]; + const auto shift_amount = + (tail + 32 * TestContext::get().isNvidia()) * !((warp_idx + 1) % warps_in_block); + mask = (mask << shift_amount) >> shift_amount; + const auto active_count = std::bitset(mask).count(); + const auto start_offset = i * grid.threads_in_block_count_ + j * kWarpSize; + const auto end_offset = start_offset + active_count; + const auto valid = + std::all_of(arr.ptr() + start_offset, arr.ptr() + end_offset, [](T e) { return e; }); + if (!valid) { + REQUIRE(valid); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Launches a kernel wherein threads in each warp are deactivated based on a passed bitmask. + * Coalesced groups are formed and divided into tiled partitions(size of 2, 4, 8, 16, 32, 64 if AMD) + * and every thread writes its intra-tile rank into an array slot determined by its global warp rank + * and coalesced group rank. The array is either in global or dynamic shared memory based on a + * compile time switch, and the test is run for arrays of 1, 2, and 4 byte elements. Before the + * write each thread executes a busy wait loop for a random amount of clock cycles, the amount being + * read from an input array. After the write a tile-wide sync is performed and each thread validates + * that it can read the expected values that other threads within the same tile have written to + * their respective array slots. + * Test source + * ------------------------ + * - unit/cooperativeGrps/coalesced_group_tiled_partition.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +uint64_t counter = 0; +TEMPLATE_TEST_CASE("Unit_Coalesced_Group_Tiled_Partition_Sync_Positive_Basic", "", uint8_t, + uint16_t, uint32_t) { + SECTION("Global memory") { CoalescedGroupTiledPartitionSyncTest(); } + SECTION("Shared memory") { CoalescedGroupTiledPartitionSyncTest(); } +} diff --git a/catch/unit/cooperativeGrps/cooperative_groups_common.hh b/catch/unit/cooperativeGrps/cooperative_groups_common.hh index 20d0d4aa44..19ad0dd092 100644 --- a/catch/unit/cooperativeGrps/cooperative_groups_common.hh +++ b/catch/unit/cooperativeGrps/cooperative_groups_common.hh @@ -76,3 +76,4 @@ template bool CheckDimensions(unsigned int device, T kernel, dim3 bloc return true; } + From 2eb17f0696c3ff9f45d15d1d74a3c40f84fbeb18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:46:50 +0100 Subject: [PATCH 02/71] EXSWHTEC-262 - Introduce build dependencies for atomic arithmetic operations #180 Change-Id: Ic2293f273158613312dc39914da087fc0a462242 --- catch/include/hip_test_defgroups.hh | 115 ++++++++++++++++++++++++++++ catch/include/resource_guards.hh | 85 ++++++++++---------- catch/unit/atomics/CMakeLists.txt | 4 +- 3 files changed, 161 insertions(+), 43 deletions(-) diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 680dfa8a04..e108f296fd 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -172,6 +172,121 @@ THE SOFTWARE. * @} */ +/** + * @defgroup AtomicsTest Device Atomics + * @{ + * This section describes tests for the Device Atomic APIs. + */ + +/** + * @addtogroup atomicAdd atomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Compiles atomicAdd with invalid parameters. + * - Compiles the source with specialized Python tool. + * -# Utilizes sub-process to invoke compilation of faulty source. + * -# Performs post-processing of compiler output and counts errors. + * Test source + * ------------------------ + * - unit/atomics/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicAdd_Negative_Parameters") {} +/** + * End doxygen group atomicAdd. + * @} + */ + +/** + * @addtogroup atomicSub atomicSub + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Compiles atomicSub with invalid parameters. + * - Compiles the source with specialized Python tool. + * -# Utilizes sub-process to invoke compilation of faulty source. + * -# Performs post-processing of compiler output and counts errors. + * Test source + * ------------------------ + * - unit/atomics/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicSub_Negative_Parameters") {} +/** + * End doxygen group atomicSub. + * @} + */ + +/** + * @addtogroup atomicInc atomicInc + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Compiles atomicInc with invalid parameters. + * - Compiles the source with specialized Python tool. + * -# Utilizes sub-process to invoke compilation of faulty source. + * -# Performs post-processing of compiler output and counts errors. + * Test source + * ------------------------ + * - unit/atomics/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicInc_Negative_Parameters") {} +/** + * End doxygen group atomicInc. + * @} + */ + +/** + * @addtogroup atomicDec atomicDec + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Compiles atomicDec with invalid parameters. + * - Compiles the source with specialized Python tool. + * -# Utilizes sub-process to invoke compilation of faulty source. + * -# Performs post-processing of compiler output and counts errors. + * Test source + * ------------------------ + * - unit/atomics/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicDec_Negative_Parameters") {} +/** + * End doxygen group atomicDec. + * @} + */ + +/** + * End doxygen group AtomicsTest. + * @} + */ + /** * @defgroup PrintfTest Printf API Management * @{ diff --git a/catch/include/resource_guards.hh b/catch/include/resource_guards.hh index 5f8f2cbee4..262b7b4437 100644 --- a/catch/include/resource_guards.hh +++ b/catch/include/resource_guards.hh @@ -35,15 +35,15 @@ enum class LinearAllocs { inline std::string to_string(const LinearAllocs allocation_type) { switch (allocation_type) { case LinearAllocs::malloc: - return "host pageable"; + return "malloc"; case LinearAllocs::mallocAndRegister: - return "registered"; + return "malloc + hipHostRegister"; case LinearAllocs::hipHostMalloc: - return "host pinned"; + return "hipHostMalloc"; case LinearAllocs::hipMalloc: - return "device malloc"; + return "hipMalloc"; case LinearAllocs::hipMallocManaged: - return "managed"; + return "hipMallocManaged"; default: return "unknown alloc type"; } @@ -83,24 +83,35 @@ template class LinearAllocGuard { LinearAllocGuard(const LinearAllocGuard&) = delete; - LinearAllocGuard(LinearAllocGuard&& o) - : allocation_type_{o.allocation_type_}, ptr_{o.ptr_}, host_ptr_{o.host_ptr_} { - o.allocation_type_ = LinearAllocs::noAlloc; - o.ptr_ = nullptr; - o.host_ptr_ = nullptr; - } + LinearAllocGuard(LinearAllocGuard&& o) { *this = std::move(o); } LinearAllocGuard& operator=(LinearAllocGuard&& o) { - allocation_type_ = o.allocation_type_; - ptr_ = o.ptr_; - host_ptr_ = o.host_ptr_; + if (this != &o) { + dealloc(); - o.allocation_type_ = LinearAllocs::noAlloc; - o.ptr_ = nullptr; - o.host_ptr_ = nullptr; + allocation_type_ = o.allocation_type_; + ptr_ = o.ptr_; + host_ptr_ = o.host_ptr_; + + o.allocation_type_ = LinearAllocs::noAlloc; + o.ptr_ = nullptr; + o.host_ptr_ = nullptr; + } + + return *this; } - ~LinearAllocGuard() { + ~LinearAllocGuard() { dealloc(); } + + T* ptr() const { return ptr_; }; + T* host_ptr() const { return host_ptr_; } + + private: + LinearAllocs allocation_type_ = LinearAllocs::noAlloc; + T* ptr_ = nullptr; + T* host_ptr_ = nullptr; + + void dealloc() { // No Catch macros, don't want to possibly throw in the destructor if (ptr_ != nullptr) { switch (allocation_type_) { @@ -123,14 +134,6 @@ template class LinearAllocGuard { } } } - - T* ptr() const { return ptr_; }; - T* host_ptr() const { return host_ptr_; } - - private: - LinearAllocs allocation_type_ = LinearAllocs::noAlloc; - T* ptr_ = nullptr; - T* host_ptr_ = nullptr; }; template class LinearAllocGuardMultiDim { @@ -266,24 +269,24 @@ class StreamGuard { StreamGuard(const StreamGuard&) = delete; - StreamGuard(StreamGuard&& o) - : stream_type_{o.stream_type_}, flags_{o.flags_}, priority_{o.priority_}, stream_{o.stream_} { - o.stream_type_ = Streams::nullstream; - o.flags_ = 0u; - o.priority_ = 0; - o.stream_ = nullptr; - } + StreamGuard(StreamGuard&& o) { *this = std::move(o); } StreamGuard& operator=(StreamGuard&& o) { - stream_type_ = o.stream_type_; - flags_ = o.flags_; - priority_ = o.priority_; - stream_ = o.stream_; + if (this != &o) { + if (stream_type_ == Streams::created) { + static_cast(hipStreamDestroy(stream_)); + } - o.stream_type_ = Streams::nullstream; - o.flags_ = 0u; - o.priority_ = 0; - o.stream_ = nullptr; + stream_type_ = o.stream_type_; + flags_ = o.flags_; + priority_ = o.priority_; + stream_ = o.stream_; + + o.stream_type_ = Streams::nullstream; + o.flags_ = 0u; + o.priority_ = 0; + o.stream_ = nullptr; + } return *this; } diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index d8066a2f1a..2aef527440 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -24,11 +24,11 @@ set(TEST_SRC ) if(HIP_PLATFORM MATCHES "nvidia") - set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests - LINKER_LIBS "nvrtc -rdc=true -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + LINKER_LIBS "nvrtc -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") elseif(HIP_PLATFORM MATCHES "amd") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} From b6180a3354465f18d5147ed588b25ca7a2147830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:49:01 +0100 Subject: [PATCH 03/71] EXSWHTEC-263 - Implement Unit Tests for Atomic Bitwise Operations #191 Change-Id: I78cf8fc662b08e3e3ba1bdb13cb27cea22fdce9e --- catch/unit/atomics/CMakeLists.txt | 39 +- catch/unit/atomics/atomicAnd.cc | 222 +++++++++++ .../atomics/atomicAnd_negative_kernels.cc | 185 ++++++++++ .../atomics/atomicAnd_negative_kernels_rtc.hh | 223 +++++++++++ catch/unit/atomics/atomicAnd_system.cc | 109 ++++++ catch/unit/atomics/atomicOr.cc | 222 +++++++++++ .../unit/atomics/atomicOr_negative_kernels.cc | 177 +++++++++ .../atomics/atomicOr_negative_kernels_rtc.hh | 223 +++++++++++ catch/unit/atomics/atomicOr_system.cc | 109 ++++++ catch/unit/atomics/atomicXor.cc | 222 +++++++++++ .../atomics/atomicXor_negative_kernels.cc | 185 ++++++++++ .../atomics/atomicXor_negative_kernels_rtc.hh | 223 +++++++++++ catch/unit/atomics/atomicXor_system.cc | 109 ++++++ catch/unit/atomics/bitwise_common.hh | 345 ++++++++++++++++++ 14 files changed, 2587 insertions(+), 6 deletions(-) create mode 100644 catch/unit/atomics/atomicAnd.cc create mode 100644 catch/unit/atomics/atomicAnd_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicAnd_system.cc create mode 100644 catch/unit/atomics/atomicOr.cc create mode 100644 catch/unit/atomics/atomicOr_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicOr_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicOr_system.cc create mode 100644 catch/unit/atomics/atomicXor.cc create mode 100644 catch/unit/atomics/atomicXor_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicXor_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicXor_system.cc create mode 100644 catch/unit/atomics/bitwise_common.hh diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index 2aef527440..fa77a87a99 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -19,23 +19,50 @@ # THE SOFTWARE. set(TEST_SRC + atomicAnd.cc + atomicAnd_system.cc + atomicOr.cc + atomicOr_system.cc + atomicXor.cc + atomicXor_system.cc atomicExch.cc atomicExch_system.cc ) if(HIP_PLATFORM MATCHES "nvidia") + set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicXor_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") hip_add_exe_to_target(NAME AtomicsTest - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS "nvrtc -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS "nvrtc -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set(EXPECTED_ERRORS 36) # EXSWHTEC-278 elseif(HIP_PLATFORM MATCHES "amd") hip_add_exe_to_target(NAME AtomicsTest - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests - LINKER_LIBS hiprtc) + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) + set(EXPECTED_ERRORS 40) endif() +add_test(NAME Unit_atomicAnd_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicAnd_negative_kernels.cc ${EXPECTED_ERRORS}) + +add_test(NAME Unit_atomicOr_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicOr_negative_kernels.cc ${EXPECTED_ERRORS}) + +add_test(NAME Unit_atomicXor_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicXor_negative_kernels.cc ${EXPECTED_ERRORS}) + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py diff --git a/catch/unit/atomics/atomicAnd.cc b/catch/unit/atomics/atomicAnd.cc new file mode 100644 index 0000000000..756526a31c --- /dev/null +++ b/catch/unit/atomics/atomicAnd.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicAnd_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicAnd atomicAnd + * @{ + * @ingroup AtomicsTest + * `atomicAnd(TestType* address, TestType* val)` - + * performs atomic bitwise AND between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicAnd with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicAnd_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicAnd_int, kAtomicAnd_uint, kAtomicAnd_ulong, kAtomicAnd_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicAnd_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicAnd_negative_kernels.cc b/catch/unit/atomics/atomicAnd_negative_kernels.cc new file mode 100644 index 0000000000..593399b45e --- /dev/null +++ b/catch/unit/atomics/atomicAnd_negative_kernels.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicAnd(int* address, int val) */ +__global__ void atomicAnd_int_v1(int* address, int* result) { *result = atomicAnd(&address, 1234); } + +__global__ void atomicAnd_int_v2(int* address, int* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_int_v3(int* address, int* result) { *result = atomicAnd(1234, 1234); } + +__global__ void atomicAnd_int_v4(Dummy* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v5(char* address, int* result) { *result = atomicAnd(address, 1234); } + +__global__ void atomicAnd_int_v6(short* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v7(long* address, int* result) { *result = atomicAnd(address, 1234); } + +__global__ void atomicAnd_int_v8(long long* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v9(float* address, int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_int_v10(double* address, int* result) { + *result = atomicAnd(address, 1234); +} + +/* unsigned int atomicAnd(unsigned int* address, unsigned int val) */ +__global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v5(char* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v6(short* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v7(long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v9(float* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_uint_v10(double* address, unsigned int* result) { + *result = atomicAnd(address, 1234); +} + +/* atomicAnd(unsigned long* address, unsigned long val) */ +__global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) { + *result = atomicAnd(address, 1234); +} + +/* atomicAnd(unsigned long long* address, unsigned long long val) */ +__global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(&address, 1234); +} + +__global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(address, address); +} + +__global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(1234, 1234); +} + +__global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} + +__global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); +} diff --git a/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh b/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh new file mode 100644 index 0000000000..d637feb9fe --- /dev/null +++ b/catch/unit/atomics/atomicAnd_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicAnd_int{ + R"( + __global__ void atomicAnd_int_v1(int* address, int* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_int_v2(int* address, int* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_int_v3(int* address, int* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_int_v4(Dummy* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v5(char* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v6(short* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v7(long* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v8(long long* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v9(float* address, int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_int_v10(double* address, int* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_uint{ + R"( + __global__ void atomicAnd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v5(char* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v6(short* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v7(long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v9(float* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_uint_v10(double* address, unsigned int* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_ulong{ + R"( + __global__ void atomicAnd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v9(float* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulong_v10(double* address, unsigned long* result) { + *result = atomicAnd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAnd_ulonglong{ + R"( + __global__ void atomicAnd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(&address, 1234); + } + + __global__ void atomicAnd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(address, address); + } + + __global__ void atomicAnd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAnd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAnd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + + __global__ void atomicAnd_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicAnd(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicAnd_system.cc b/catch/unit/atomics/atomicAnd_system.cc new file mode 100644 index 0000000000..e696a8ac26 --- /dev/null +++ b/catch/unit/atomics/atomicAnd_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicAnd_system atomicAnd_system + * @{ + * @ingroup AtomicsTest + * `atomicAnd_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise AND between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicAnd_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicAnd_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAnd_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicOr.cc b/catch/unit/atomics/atomicOr.cc new file mode 100644 index 0000000000..e2ae9c6825 --- /dev/null +++ b/catch/unit/atomics/atomicOr.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicOr_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicOr atomicOr + * @{ + * @ingroup AtomicsTest + * `atomicOr(TestType* address, TestType* val)` - + * performs atomic bitwise OR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_Positive_Multi_Kernel_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicAnd with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicOr.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicOr_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicOr_int, kAtomicOr_uint, kAtomicOr_ulong, kAtomicOr_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicOr_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicOr_negative_kernels.cc b/catch/unit/atomics/atomicOr_negative_kernels.cc new file mode 100644 index 0000000000..47a56e3a0d --- /dev/null +++ b/catch/unit/atomics/atomicOr_negative_kernels.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicOr(int* address, int val) */ +__global__ void atomicOr_int_v1(int* address, int* result) { *result = atomicOr(&address, 1234); } + +__global__ void atomicOr_int_v2(int* address, int* result) { *result = atomicOr(address, address); } + +__global__ void atomicOr_int_v3(int* address, int* result) { *result = atomicOr(1234, 1234); } + +__global__ void atomicOr_int_v4(Dummy* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v5(char* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v6(short* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v7(long* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v8(long long* address, int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_int_v9(float* address, int* result) { *result = atomicOr(address, 1234); } + +__global__ void atomicOr_int_v10(double* address, int* result) { + *result = atomicOr(address, 1234); +} + +/* unsigned int atomicOr(unsigned int* address, unsigned int val) */ +__global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v5(char* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v6(short* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v7(long* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v8(long long* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v9(float* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_uint_v10(double* address, unsigned int* result) { + *result = atomicOr(address, 1234); +} + +/* atomicOr(unsigned long* address, unsigned long val) */ +__global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v5(char* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v6(short* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v7(long* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +/* atomicOr(unsigned long long* address, unsigned long long val) */ +__global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(&address, 1234); +} + +__global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(address, address); +} + +__global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(1234, 1234); +} + +__global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} diff --git a/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh b/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh new file mode 100644 index 0000000000..dd4117e704 --- /dev/null +++ b/catch/unit/atomics/atomicOr_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicOr_int{ + R"( + __global__ void atomicOr_int_v1(int* address, int* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_int_v2(int* address, int* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_int_v3(int* address, int* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_int_v4(Dummy* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v5(char* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v6(short* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v7(long* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v8(long long* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v9(float* address, int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_int_v10(double* address, int* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_uint{ + R"( + __global__ void atomicOr_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v5(char* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v6(short* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v7(long* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v8(long long* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v9(float* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_uint_v10(double* address, unsigned int* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_ulong{ + R"( + __global__ void atomicOr_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v5(char* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v6(short* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v7(long* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v8(long long* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); + } + )"}; + +static constexpr auto kAtomicOr_ulonglong{ + R"( + __global__ void atomicOr_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(&address, 1234); + } + + __global__ void atomicOr_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(address, address); + } + + __global__ void atomicOr_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicOr(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicOr_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + + __global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicOr_system.cc b/catch/unit/atomics/atomicOr_system.cc new file mode 100644 index 0000000000..0239056e3f --- /dev/null +++ b/catch/unit/atomics/atomicOr_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicOr_system atomicOr_system + * @{ + * @ingroup AtomicsTest + * `atomicOr_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise OR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicOr_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicOr_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicOr_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicXor.cc b/catch/unit/atomics/atomicXor.cc new file mode 100644 index 0000000000..0fb31252c5 --- /dev/null +++ b/catch/unit/atomics/atomicXor.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicXor_negative_kernels_rtc.hh" +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicXor atomicXor + * @{ + * @ingroup AtomicsTest + * `atomicXor(TestType* address, TestType* val)` - + * performs atomic bitwise XOR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size - 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceMultipleKernelTest( + 2, warp_size - 1, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicXor with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicXor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicXor_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = + GENERATE(kAtomicXor_int, kAtomicXor_uint, kAtomicXor_ulong, kAtomicXor_ulonglong); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicXor_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{9}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicXor_negative_kernels.cc b/catch/unit/atomics/atomicXor_negative_kernels.cc new file mode 100644 index 0000000000..a180afd6db --- /dev/null +++ b/catch/unit/atomics/atomicXor_negative_kernels.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicXor(int* address, int val) */ +__global__ void atomicXor_int_v1(int* address, int* result) { *result = atomicXor(&address, 1234); } + +__global__ void atomicXor_int_v2(int* address, int* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_int_v3(int* address, int* result) { *result = atomicXor(1234, 1234); } + +__global__ void atomicXor_int_v4(Dummy* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v5(char* address, int* result) { *result = atomicXor(address, 1234); } + +__global__ void atomicXor_int_v6(short* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v7(long* address, int* result) { *result = atomicXor(address, 1234); } + +__global__ void atomicXor_int_v8(long long* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v9(float* address, int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v10(double* address, int* result) { + *result = atomicXor(address, 1234); +} + +/* unsigned int atomicXor(unsigned int* address, unsigned int val) */ +__global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v5(char* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v6(short* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v7(long* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_uint_v8(long long* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v9(float* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_int_v10(double* address, unsigned int* result) { + *result = atomicXor(address, 1234); +} + +/* atomicXor(unsigned long* address, unsigned long val) */ +__global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v5(char* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v6(short* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v7(long* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulong_v9(float* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicXor_ulong_v10(double* address, unsigned long* result) { + *result = atomicOr(address, 1234); +} + +/* atomicXor(unsigned long long* address, unsigned long long val) */ +__global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(&address, 1234); +} + +__global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(address, address); +} + +__global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(1234, 1234); +} + +__global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); +} + +__global__ void atomicOr_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} + +__global__ void atomicOr_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicOr(address, 1234); +} diff --git a/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh b/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh new file mode 100644 index 0000000000..3d4e19c7e7 --- /dev/null +++ b/catch/unit/atomics/atomicXor_negative_kernels_rtc.hh @@ -0,0 +1,223 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicXor_int{ + R"( + __global__ void atomicXor_int_v1(int* address, int* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_int_v2(int* address, int* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_int_v3(int* address, int* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_int_v4(Dummy* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v5(char* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v6(short* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v7(long* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v8(long long* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v9(float* address, int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_int_v10(double* address, int* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_uint{ + R"( + __global__ void atomicXor_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v5(char* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v6(short* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v7(long* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v8(long long* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v9(float* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_uint_v10(double* address, unsigned int* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_ulong{ + R"( + __global__ void atomicXor_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v5(char* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v6(short* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v7(long* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v8(long long* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v9(float* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulong_v10(double* address, unsigned long* result) { + *result = atomicXor(address, 1234); + } + )"}; + +static constexpr auto kAtomicXor_ulonglong{ + R"( + __global__ void atomicXor_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(&address, 1234); + } + + __global__ void atomicXor_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(address, address); + } + + __global__ void atomicXor_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicXor(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicXor_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v9(float* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + + __global__ void atomicXor_ulonglong_v10(double* address, unsigned long long* result) { + *result = atomicXor(address, 1234); + } + )"}; diff --git a/catch/unit/atomics/atomicXor_system.cc b/catch/unit/atomics/atomicXor_system.cc new file mode 100644 index 0000000000..fbfb82d36d --- /dev/null +++ b/catch/unit/atomics/atomicXor_system.cc @@ -0,0 +1,109 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup atomicXor_system atomicXor_system + * @{ + * @ingroup AtomicsTest + * `atomicXor_system(TestType* address, TestType* val)` - + * performs system-wide atomic bitwise XOR between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicXor_system from multiple threads on scattered addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicXor_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicXor_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/bitwise_common.hh b/catch/unit/atomics/bitwise_common.hh new file mode 100644 index 0000000000..9e71c99cb0 --- /dev/null +++ b/catch/unit/atomics/bitwise_common.hh @@ -0,0 +1,345 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +namespace Bitwise { +enum class AtomicOperation { + kAnd = 0, + kAndSystem, + kOr, + kOrSystem, + kXor, + kXorSystem, +}; + +constexpr auto kMask = 0xAAAA; +constexpr auto kTestValue = 0x4545; +constexpr auto kAndTestValue = 0xFFFF; + +template +__host__ __device__ TestType GetTestValue() { + if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) { + return kAndTestValue; + } + + return kTestValue; +} + +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto mask = kMask; + + if constexpr (operation == AtomicOperation::kAnd) { + return atomicAnd(mem, mask); + } else if constexpr (operation == AtomicOperation::kAndSystem) { + return atomicAnd_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kOr) { + return atomicOr(mem, mask); + } else if constexpr (operation == AtomicOperation::kOrSystem) { + return atomicOr_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kXor) { + return atomicXor(mem, mask); + } else if constexpr (operation == AtomicOperation::kXorSystem) { + return atomicXor_system(mem, mask); + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + old_vals[tid] = + PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + dim3 blocks; + dim3 threads; + unsigned int num_devices = 1u; + unsigned int kernel_count = 1u; + unsigned int width = 1u; + unsigned int pitch = 0u; + unsigned int host_thread_count = 0u; + LinearAllocs alloc_type; +}; + +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount(); + + TestType test_value = GetTestValue(); + const auto mask = kMask; + std::vector res_vals(p.width, test_value); + std::vector old_vals; + old_vals.reserve(thread_count); + + for (auto tid = 0u; tid < thread_count; ++tid) { + auto& res = res_vals[tid % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) { + res = res & mask; + } else if constexpr (operation == AtomicOperation::kOr || + operation == AtomicOperation::kOrSystem) { + res = res | mask; + } else if constexpr (operation == AtomicOperation::kXor || + operation == AtomicOperation::kXorSystem) { + res = res ^ mask; + } + } + + return {res_vals, old_vals}; +} + +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +template +void TestCore(const TestParams& p) { + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + TestType test_value = GetTestValue(); + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) { + HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice)); + } + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + } + } + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + params.blocks = GENERATE(dim3(3)); + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +template +void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, + const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} +} // namespace Bitwise From cf34eb8d63b4816c7964dea5fb5bb89ad5c4c930 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:28:08 +0100 Subject: [PATCH 04/71] EXSWHTEC-264 - Implement Unit Tests for Atomic Min/Max Operations #192 Change-Id: I11779d677b4133b1bc3baa244f8a1b9b21e0045a --- catch/unit/atomics/CMakeLists.txt | 24 +- catch/unit/atomics/atomicMax.cc | 222 +++++++++++ .../atomics/atomicMax_negative_kernels.cc | 219 +++++++++++ .../atomics/atomicMax_negative_kernels_rtc.hh | 273 +++++++++++++ catch/unit/atomics/atomicMax_system.cc | 124 ++++++ catch/unit/atomics/atomicMin.cc | 222 +++++++++++ .../atomics/atomicMin_negative_kernels.cc | 219 +++++++++++ .../atomics/atomicMin_negative_kernels_rtc.hh | 273 +++++++++++++ catch/unit/atomics/atomicMin_system.cc | 124 ++++++ catch/unit/atomics/min_max_common.hh | 360 ++++++++++++++++++ catch/unit/atomics/safeAtomicMax.cc | 175 +++++++++ catch/unit/atomics/safeAtomicMin.cc | 175 +++++++++ catch/unit/atomics/unsafeAtomicMax.cc | 175 +++++++++ catch/unit/atomics/unsafeAtomicMin.cc | 175 +++++++++ 14 files changed, 2758 insertions(+), 2 deletions(-) create mode 100644 catch/unit/atomics/atomicMax.cc create mode 100644 catch/unit/atomics/atomicMax_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicMax_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicMax_system.cc create mode 100644 catch/unit/atomics/atomicMin.cc create mode 100644 catch/unit/atomics/atomicMin_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicMin_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicMin_system.cc create mode 100644 catch/unit/atomics/min_max_common.hh create mode 100644 catch/unit/atomics/safeAtomicMax.cc create mode 100644 catch/unit/atomics/safeAtomicMin.cc create mode 100644 catch/unit/atomics/unsafeAtomicMax.cc create mode 100644 catch/unit/atomics/unsafeAtomicMin.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index fa77a87a99..f18abbf3e5 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -25,6 +25,14 @@ set(TEST_SRC atomicOr_system.cc atomicXor.cc atomicXor_system.cc + atomicMin.cc + atomicMin_system.cc + atomicMax.cc + atomicMax_system.cc + safeAtomicMin.cc + unsafeAtomicMin.cc + safeAtomicMax.cc + unsafeAtomicMax.cc atomicExch.cc atomicExch_system.cc ) @@ -34,18 +42,20 @@ if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicXor_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicMin_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicMax_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests LINKER_LIBS "nvrtc -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") - set(EXPECTED_ERRORS 36) # EXSWHTEC-278 + set(EXPECTED_ERRORS 42) # EXSWHTEC-278 elseif(HIP_PLATFORM MATCHES "amd") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests LINKER_LIBS hiprtc) - set(EXPECTED_ERRORS 40) + set(EXPECTED_ERRORS 48) endif() add_test(NAME Unit_atomicAnd_Negative_Parameters @@ -63,6 +73,16 @@ add_test(NAME Unit_atomicXor_Negative_Parameters ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicXor_negative_kernels.cc ${EXPECTED_ERRORS}) +add_test(NAME Unit_atomicMin_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicMin_negative_kernels.cc ${EXPECTED_ERRORS}) + +add_test(NAME Unit_atomicMax_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicMax_negative_kernels.cc ${EXPECTED_ERRORS}) + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py diff --git a/catch/unit/atomics/atomicMax.cc b/catch/unit/atomics/atomicMax.cc new file mode 100644 index 0000000000..e98ceaaf4c --- /dev/null +++ b/catch/unit/atomics/atomicMax.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMax_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax atomicMax + * @{ + * @ingroup AtomicsTest + * `atomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMax with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMax_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMax_int, kAtomicMax_uint, kAtomicMax_ulong, + kAtomicMax_ulonglong, kAtomicMax_float, kAtomicMax_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMax_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicMax_negative_kernels.cc b/catch/unit/atomics/atomicMax_negative_kernels.cc new file mode 100644 index 0000000000..2f9b6a6306 --- /dev/null +++ b/catch/unit/atomics/atomicMax_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMax(int* address, int val) */ +__global__ void atomicMax_int_v1(int* address, int* result) { *result = atomicMax(&address, 1234); } + +__global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_int_v3(int* address, int* result) { *result = atomicMax(1234, 1234); } + +__global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v5(char* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v7(long* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); +} + +/* unsigned int atomicMax(unsigned int* address, unsigned int val) */ +__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long* address, unsigned long val) */ +__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(float* address, float val) */ +__global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); +} + +__global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); +} + +__global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(double* address, double val) */ +__global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); +} + +__global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); +} + +__global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh b/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh new file mode 100644 index 0000000000..885f9f5250 --- /dev/null +++ b/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMax_int{ + R"( + __global__ void atomicMax_int_v1(int* address, int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_int_v3(int* address, int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v5(char* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v7(long* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_uint{ + R"( + __global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulong{ + R"( + __global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulonglong{ + R"( + __global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_float{ + R"( + __global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); + } + + __global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_double{ + R"( + __global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); + } + + __global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicMax_system.cc b/catch/unit/atomics/atomicMax_system.cc new file mode 100644 index 0000000000..b07b566616 --- /dev/null +++ b/catch/unit/atomics/atomicMax_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax_system atomicMax_system + * @{ + * @ingroup AtomicsTest + * `atomicMax_system(TestType* address, TestType* val)` - + * performs system-wide atomic maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/atomicMin.cc b/catch/unit/atomics/atomicMin.cc new file mode 100644 index 0000000000..3d0f89412f --- /dev/null +++ b/catch/unit/atomics/atomicMin.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMin_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin atomicMin + * @{ + * @ingroup AtomicsTest + * `atomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMin with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMin_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMin_int, kAtomicMin_uint, kAtomicMin_ulong, + kAtomicMin_ulonglong, kAtomicMin_float, kAtomicMin_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMin_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicMin_negative_kernels.cc b/catch/unit/atomics/atomicMin_negative_kernels.cc new file mode 100644 index 0000000000..644b7aaf8b --- /dev/null +++ b/catch/unit/atomics/atomicMin_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMin(int* address, int val) */ +__global__ void atomicMin_int_v1(int* address, int* result) { *result = atomicMin(&address, 1234); } + +__global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_int_v3(int* address, int* result) { *result = atomicMin(1234, 1234); } + +__global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v5(char* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v7(long* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); +} + +/* unsigned int atomicMin(unsigned int* address, unsigned int val) */ +__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long* address, unsigned long val) */ +__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(float* address, float val) */ +__global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); +} + +__global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); +} + +__global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(double* address, double val) */ +__global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); +} + +__global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); +} + +__global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh b/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh new file mode 100644 index 0000000000..cc1ae5c7af --- /dev/null +++ b/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMin_int{ + R"( + __global__ void atomicMin_int_v1(int* address, int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_int_v3(int* address, int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v5(char* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v7(long* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_uint{ + R"( + __global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulong{ + R"( + __global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulonglong{ + R"( + __global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_float{ + R"( + __global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); + } + + __global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_double{ + R"( + __global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); + } + + __global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicMin_system.cc b/catch/unit/atomics/atomicMin_system.cc new file mode 100644 index 0000000000..7474a2e10d --- /dev/null +++ b/catch/unit/atomics/atomicMin_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin_system atomicMin_system + * @{ + * @ingroup AtomicsTest + * `atomicMin_system(TestType* address, TestType* val)` - + * performs system-wide atomic minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/min_max_common.hh b/catch/unit/atomics/min_max_common.hh new file mode 100644 index 0000000000..c171c6f3c6 --- /dev/null +++ b/catch/unit/atomics/min_max_common.hh @@ -0,0 +1,360 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +namespace MinMax { +enum class AtomicOperation { + kMin = 0, + kMinSystem, + kMax, + kMaxSystem, + kSafeMin, + kUnsafeMin, + kSafeMax, + kUnsafeMax +}; + +constexpr auto kIntegerTestValue = 5; +constexpr auto kFloatingPointTestValue = 5.5; + +template +__host__ __device__ TestType GetTestValue() { + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin) { + return test_value - 2; + } + + return test_value + 2; +} + +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto val = GetTestValue(); + + if constexpr (operation == AtomicOperation::kMin) { + return atomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kMinSystem) { + return atomicMin_system(mem, val); + } else if constexpr (operation == AtomicOperation::kMax) { + return atomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kMaxSystem) { + return atomicMax_system(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMin) { + return unsafeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMin) { + return safeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMax) { + return unsafeAtomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMax) { + return safeAtomicMax(mem, val); + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + old_vals[tid] = + PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + dim3 blocks; + dim3 threads; + unsigned int num_devices = 1u; + unsigned int kernel_count = 1u; + unsigned int width = 1u; + unsigned int pitch = 0u; + unsigned int host_thread_count = 0u; + LinearAllocs alloc_type; +}; + +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto val = GetTestValue(); + + const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + std::vector res_vals(p.width, test_value); + std::vector old_vals; + old_vals.reserve(thread_count); + + for (auto tid = 0u; tid < thread_count; ++tid) { + auto& res = res_vals[tid % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin) { + res = std::min(res, val); + } else if constexpr (operation == AtomicOperation::kMax || + operation == AtomicOperation::kMaxSystem || + operation == AtomicOperation::kUnsafeMax || + operation == AtomicOperation::kSafeMax) { + res = std::max(res, val); + } + } + + return {res_vals, old_vals}; +} + +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +template +void TestCore(const TestParams& p) { + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) { + HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice)); + } + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + } + } + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + params.blocks = GENERATE(dim3(3)); + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +template +void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, + const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} +} // namespace MinMax diff --git a/catch/unit/atomics/safeAtomicMax.cc b/catch/unit/atomics/safeAtomicMax.cc new file mode 100644 index 0000000000..581a4a566a --- /dev/null +++ b/catch/unit/atomics/safeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMax safeAtomicMax + * @{ + * @ingroup AtomicsTest + * `safeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/safeAtomicMin.cc b/catch/unit/atomics/safeAtomicMin.cc new file mode 100644 index 0000000000..810be72ca4 --- /dev/null +++ b/catch/unit/atomics/safeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMin safeAtomicMin + * @{ + * @ingroup AtomicsTest + * `safeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/unsafeAtomicMax.cc b/catch/unit/atomics/unsafeAtomicMax.cc new file mode 100644 index 0000000000..2341e2d8c7 --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMax unsafeAtomicMax + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/catch/unit/atomics/unsafeAtomicMin.cc b/catch/unit/atomics/unsafeAtomicMin.cc new file mode 100644 index 0000000000..60b827854a --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMin unsafeAtomicMin + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} From cf5ebc5990a298092ccac7d29bd753e5941d9e41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:33:28 +0100 Subject: [PATCH 05/71] EXSWHTEC-280 - Implement Unit Tests for launch bounds #209 Change-Id: I43af8c66dfcb4926bc29fe936a37d91fef6a0650 --- catch/include/hip_test_defgroups.hh | 58 ++++++ catch/unit/CMakeLists.txt | 1 + catch/unit/launchBounds/CMakeLists.txt | 47 +++++ catch/unit/launchBounds/launch_bounds.cc | 173 ++++++++++++++++++ .../launch_bounds_compiler_error_kernels.cc | 35 ++++ .../launch_bounds_negative_kernels_rtc.hh | 64 +++++++ .../launch_bounds_parse_error_kernels.cc | 30 +++ 7 files changed, 408 insertions(+) create mode 100644 catch/unit/launchBounds/CMakeLists.txt create mode 100644 catch/unit/launchBounds/launch_bounds.cc create mode 100644 catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc create mode 100644 catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh create mode 100644 catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index e108f296fd..c1f72bdf6c 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -307,3 +307,61 @@ TEST_CASE("Unit_atomicDec_Negative_Parameters") {} * This section describes tests for the Complex type functions. * @} */ + +/** + * @defgroup DeviceLanguageTest Device Language + * @{ + * This section describes tests for the Device Language API. + */ + +/** + * @addtogroup launch_bounds launch_bounds + * @{ + * @ingroup DeviceLanguageTest + */ + +/** + * Test Description + * ------------------------ + * - Validates handling of invalid arguments: + * -# Compiles kernels that are not created appropriately: + * - Maximum number of threads is 0 + * - Maximum number of threads is not integer value + * - Mimimum number of warps is not integer value + * -# Expected output: compiler error + * Test source + * ------------------------ + * - unit/launch_bounds/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_CompilerError") {} + +/** + * Test Description + * ------------------------ + * - Validates handling of invalid arguments: + * -# Compiles kernels that are not created appropriately: + * - Maximum number of threads is negative + * - Mimimum number of warps is negative + * - Validates handling of invalid arguments: + * -# Expected output: parse error + * Test source + * ------------------------ + * - unit/launch_bounds/CMakeLists.txt + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_ParseError") {} + +/** + * End doxygen group launch_bounds. + * @} + */ + +/** + * End doxygen group DeviceLanguageTest. + * @} + */ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 304016410e..9d158dad4d 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -58,3 +58,4 @@ add_subdirectory(vulkan_interop) add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246 endif() add_subdirectory(synchronization) +add_subdirectory(launchBounds) diff --git a/catch/unit/launchBounds/CMakeLists.txt b/catch/unit/launchBounds/CMakeLists.txt new file mode 100644 index 0000000000..589fe4d8e8 --- /dev/null +++ b/catch/unit/launchBounds/CMakeLists.txt @@ -0,0 +1,47 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + launch_bounds.cc +) + +if(HIP_PLATFORM MATCHES "nvidia") + hip_add_exe_to_target(NAME LaunchBoundsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + hip_add_exe_to_target(NAME LaunchBoundsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +add_test(NAME Unit_Kernel_Launch_bounds_Negative_Parameters_CompilerError + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + launch_bounds_compiler_error_kernels.cc -1) + +if(HIP_PLATFORM MATCHES "amd") + add_test(NAME Unit_Kernel_Launch_bounds_Negative_Parameters_ParseError + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + launch_bounds_parse_error_kernels.cc -1) +endif() diff --git a/catch/unit/launchBounds/launch_bounds.cc b/catch/unit/launchBounds/launch_bounds.cc new file mode 100644 index 0000000000..72b087331b --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds.cc @@ -0,0 +1,173 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "launch_bounds_negative_kernels_rtc.hh" + +/** + * @addtogroup launch_bounds launch_bounds + * @{ + * @ingroup DeviceLanguageTest + * `__launch_bounds__(MAX_THREADS_PER_BLOCK, MIN_WARPS_PER_EXECUTION_UNIT)` - + * allows the application to provide usage hints that influence the resources (primarily registers) + * used by the generated code. It is a function attribute that must be attached to a global + * function. + */ + +constexpr int kMaxThreadsPerBlock = 128; +constexpr int kMinWarpsPerMultiprocessor = 2; + +__launch_bounds__(kMaxThreadsPerBlock, kMinWarpsPerMultiprocessor) __global__ + void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +template void LaunchBoundsWrapper(const int threads_per_block) { + auto block_size = GENERATE(1, 32, 128); + int* A_d; + int* A_h; + int sum{0}; + + A_h = static_cast(malloc(sizeof(int))); + memset(A_h, 0, sizeof(int)); + HIP_CHECK(hipMalloc(&A_d, sizeof(int))); + HIP_CHECK(hipMemcpy(A_d, A_h, sizeof(int), hipMemcpyHostToDevice)); + SumKernel<<>>(A_d); + + if constexpr (out_of_bounds) { + if (threads_per_block < 0) { + HIP_CHECK_ERROR(hipGetLastError(), hipErrorInvalidConfiguration); + } else { +#if HT_AMD + HIP_CHECK_ERROR(hipGetLastError(), hipErrorLaunchFailure); +#else + HIP_CHECK_ERROR(hipGetLastError(), hipErrorInvalidValue); +#endif + } + } else { + HIP_CHECK(hipGetLastError()); + } + + HIP_CHECK(hipMemcpy(A_h, A_d, sizeof(int), hipMemcpyDeviceToHost)); + + if constexpr (!out_of_bounds) { + for (int i = 0; i < threads_per_block * block_size; ++i) { + sum += i; + } + REQUIRE(*A_h == sum); + } + + free(A_h); + HIP_CHECK(hipFree(A_d)); +} + +/** + * Test Description + * ------------------------ + * - Executes simple addition kernel and validates results. + * - The number of threads per block used to launch the kernel + * are complied with the `__launch_bounds__`: + * -# Number of threads per block are less than or equal to the configured maximum value. + * -# Different values are assigned and kernel functionality is validated. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Positive_Basic") { + auto threads_per_block = GENERATE(1, kMaxThreadsPerBlock / 2, kMaxThreadsPerBlock); + LaunchBoundsWrapper(threads_per_block); +} + +/** + * Test Description + * ------------------------ + * - Validates that the kernels will not be launched if the number of threads + * per block is larger than configured with `__launch_bounds__`: + * -# Expected output: + * - return `hipErrorLaunchFailure` on AMD. + * - return `hipErrorInvalidValue` on NVIDIA. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_OutOfBounds") { + auto threads_per_block = + GENERATE(-1 * kMaxThreadsPerBlock, -1, kMaxThreadsPerBlock + 1, 2 * kMaxThreadsPerBlock); + LaunchBoundsWrapper(threads_per_block); +} + +/** + * Test Description + * ------------------------ + * - Validates handling of invalid arguments: + * -# Compiles kernels that are not created appropriately: + * - Maximum number of threads is 0 + * - Maximum number of threads is negative + * - Minimum number of warps is negative + * - Maximum number of threads is not integer value + * - Mimimum number of warps is not integer value + * -# Expected output: compiler error + * - Uses RTC for compilation. + * Test source + * ------------------------ + * - unit/launch_bounds/launch_bounds.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_RTC") { + hiprtcProgram program{}; + +#if HT_AMD + const auto program_source = GENERATE(kMaxThreadsZero, kMaxThreadsNegative, kMinWarpsNegative, + kMaxThreadsNotInt, kMinWarpsNotInt); +#else + // Aligned with CUDA behavior and expected behavior on NVIDIA + const auto program_source = GENERATE(kMaxThreadsNotInt, kMinWarpsNotInt); +#endif + + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "launch_bounds_negative.cc", 0, + nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log. + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + REQUIRE(error_count > 0); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); +} diff --git a/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc b/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc new file mode 100644 index 0000000000..4a8ec9885f --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_compiler_error_kernels.cc @@ -0,0 +1,35 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__launch_bounds__(0) __global__ void MaxThreadsZero(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(1.5) __global__ void MaxThreadsNotInt(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(128, 1.5) __global__ void MinWarpsNotInt(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} diff --git a/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh b/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh new file mode 100644 index 0000000000..a341ce67ff --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_negative_kernels_rtc.hh @@ -0,0 +1,64 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the launch bounds negative Test Cases that are using RTC. +*/ + +static constexpr auto kMaxThreadsZero{ + R"( + __launch_bounds__(0) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMaxThreadsNegative{ + R"( + __launch_bounds__(-1) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMinWarpsNegative{ + R"( + __launch_bounds__(128, -1) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMaxThreadsNotInt{ + R"( + __launch_bounds__(1.5) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; + +static constexpr auto kMinWarpsNotInt{ + R"( + __launch_bounds__(128, 1.5) __global__ void SumKernel(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); + } + )"}; diff --git a/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc b/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc new file mode 100644 index 0000000000..e0ed6093f5 --- /dev/null +++ b/catch/unit/launchBounds/launch_bounds_parse_error_kernels.cc @@ -0,0 +1,30 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__launch_bounds__(-1) __global__ void MaxThreadsNegative(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} + +__launch_bounds__(128, -1) __global__ void MinWarpsNegative(int* sum) { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + atomicAdd(sum, tid); +} From 044a59496cbfec069485d9233d93d96f46927565 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Thu, 28 Dec 2023 17:41:54 +0000 Subject: [PATCH 06/71] EXSWHTEC-281 - Implement Unit Tests for assert functions #210 Change-Id: I6c05915c957d9b67951b3e97cc35cb1ca72a945f --- catch/hipTestMain/config/config_amd_linux | 2 + catch/hipTestMain/config/config_amd_windows | 5 + .../config/config_nvidia_linux.json | 5 +- catch/include/hip_test_common.hh | 13 + catch/unit/CMakeLists.txt | 1 + catch/unit/assertion/CMakeLists.txt | 48 +++ catch/unit/assertion/assert.cc | 118 +++++++ catch/unit/assertion/static_assert.cc | 88 +++++ .../static_assert_kernels_negative.cc | 30 ++ .../static_assert_kernels_positive.cc | 32 ++ .../assertion/static_assert_kernels_rtc.hh | 56 +++ .../graph/hipGraphAddMemcpyNodeFromSymbol.cc | 11 +- .../graph/hipGraphAddMemcpyNodeToSymbol.cc | 3 +- catch/unit/graph/hipGraphAddMemsetNode.cc | 96 +++-- ...hipGraphExecMemcpyNodeSetParamsToSymbol.cc | 3 +- .../graph/hipGraphExecMemsetNodeSetParams.cc | 3 +- .../hipGraphMemcpyNodeSetParamsFromSymbol.cc | 3 +- .../hipGraphMemcpyNodeSetParamsToSymbol.cc | 3 +- .../unit/graph/hipGraphMemsetNodeGetParams.cc | 1 - .../unit/graph/hipGraphMemsetNodeSetParams.cc | 3 +- catch/unit/graph/hipLaunchHostFunc.cc | 1 - catch/unit/graph/hipStreamBeginCapture.cc | 334 +++++++----------- catch/unit/graph/hipStreamEndCapture.cc | 1 - catch/unit/graph/hipStreamGetCaptureInfo.cc | 1 - .../unit/graph/hipStreamGetCaptureInfo_v2.cc | 1 - catch/unit/graph/hipStreamIsCapturing.cc | 1 - .../hipStreamUpdateCaptureDependencies.cc | 3 +- .../hipThreadExchangeStreamCaptureMode.cc | 1 - catch/unit/kernel/hipShflTests.cc | 99 ++---- catch/unit/kernel/hipShflUpDownTest.cc | 157 +++----- catch/unit/stream/hipStreamGetDevice.cc | 14 +- 31 files changed, 674 insertions(+), 463 deletions(-) create mode 100644 catch/unit/assertion/CMakeLists.txt create mode 100644 catch/unit/assertion/assert.cc create mode 100644 catch/unit/assertion/static_assert.cc create mode 100644 catch/unit/assertion/static_assert_kernels_negative.cc create mode 100644 catch/unit/assertion/static_assert_kernels_positive.cc create mode 100644 catch/unit/assertion/static_assert_kernels_rtc.hh diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 43bbdd1c86..1f38291d20 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -309,6 +309,8 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipGraphKernelNodeGetAttribute_Negative_Parameters", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_Assert_Positive_Basic_KernelFail", #endif #if defined VEGA20 "=== SWDEV-419112 Below tests fail in stress test on 29/08/23 ===", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index d7229c1927..74c5bca32a 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -440,6 +440,11 @@ "Unit_Thread_Block_Tile_Shfl_Positive_Basic - float", "Unit_Thread_Block_Tile_Shfl_Positive_Basic - double", "Unit_Thread_Block_Tile_Getters_Positive_Basic", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_StaticAssert_Positive_Basic_RTC", + "Unit_Assert_Positive_Basic_KernelFail", + "Unit_StaticAssert_Positive_Basic", + "Unit_StaticAssert_Negative_Basic", #endif "End of json" ] diff --git a/catch/hipTestMain/config/config_nvidia_linux.json b/catch/hipTestMain/config/config_nvidia_linux.json index 3fdf6d03a6..ada918a267 100644 --- a/catch/hipTestMain/config/config_nvidia_linux.json +++ b/catch/hipTestMain/config/config_nvidia_linux.json @@ -89,6 +89,9 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior", - "Unit_hipMemcpy_Positive_Synchronization_Behavior" + "Unit_hipMemcpy_Positive_Synchronization_Behavior", + "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", + "Unit_StaticAssert_Positive_Basic", + "Unit_StaticAssert_Negative_Basic" ] } diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index 147abe0941..c2d19650bf 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -129,6 +129,19 @@ THE SOFTWARE. } \ } +// Check that an expression, errorExpr, evaluates to the expected error_t, expectedError. +#define HIPRTC_CHECK_ERROR(errorExpr, expectedError) \ + { \ + auto localError = errorExpr; \ + INFO("Matching Errors: " \ + << "\n Expected Error: " << hiprtcGetErrorString(expectedError) \ + << "\n Expected Code: " << expectedError << '\n' \ + << " Actual Error: " << hiprtcGetErrorString(localError) \ + << "\n Actual Code: " << localError << "\nStr: " << #errorExpr \ + << "\n In File: " << __FILE__ << "\n At line: " << __LINE__); \ + REQUIRE(localError == expectedError); \ + } + #define HIPASSERT(condition) \ if (!(condition)) { \ printf("assertion %s at %s:%d \n", #condition, __FILE__, __LINE__); \ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 9d158dad4d..46f79b4d1d 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -59,3 +59,4 @@ add_subdirectory(gl_interop) # Disabled on NVIDIA due to defect - EXSWHTEC-246 endif() add_subdirectory(synchronization) add_subdirectory(launchBounds) +add_subdirectory(assertion) \ No newline at end of file diff --git a/catch/unit/assertion/CMakeLists.txt b/catch/unit/assertion/CMakeLists.txt new file mode 100644 index 0000000000..f7b38de221 --- /dev/null +++ b/catch/unit/assertion/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +if(HIP_PLATFORM MATCHES "nvidia") + set(TEST_SRC + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + static_assert.cc + assert.cc + ) + hip_add_exe_to_target(NAME AssertionTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + LINKER_LIBS hiprtc) +endif() + +add_test(NAME Unit_StaticAssert_Positive_Basic + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + static_assert_kernels_positive.cc 2) + +add_test(NAME Unit_StaticAssert_Negative_Basic + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + static_assert_kernels_negative.cc 2) diff --git a/catch/unit/assertion/assert.cc b/catch/unit/assertion/assert.cc new file mode 100644 index 0000000000..1be0569f2a --- /dev/null +++ b/catch/unit/assertion/assert.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup assert assert + * @{ + * @ingroup DeviceLanguageTest + * `void assert(int expression)` - + * Stops the kernel execution if expression is equal to zero. + */ + +jmp_buf env_ignore_abort; +volatile int abort_raised_flag = 0; + +void on_sigabrt(int signum) { + signal(signum, SIG_DFL); + abort_raised_flag = 1; + longjmp(env_ignore_abort, 1); +} + +void try_and_catch_abort(void (*func)()) { + if (!setjmp(env_ignore_abort)) { + signal(SIGABRT, &on_sigabrt); + (*func)(); + signal(SIGABRT, SIG_DFL); + } +} + +__global__ void AssertPassKernel() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + // expected always to be true + assert(tid >= 0); +} + +__global__ void AssertFailKernel() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + // expected to fail for the even thread indices + assert(tid % 2 == 1); +} + +template void LaunchAssertKernel() { + const int num_blocks = 2; + const int num_threads = 16; + + if constexpr (should_abort) { + AssertFailKernel<<>>(); +#if HT_AMD + HIP_CHECK(hipDeviceSynchronize()); +#else + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorAssert); +#endif + } else { + AssertPassKernel<<>>(); + HIP_CHECK(hipDeviceSynchronize()); + } +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 1. + * - Expects that SIGABRT is not raised and kernels have executed successfully. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelPass") { + try_and_catch_abort(&LaunchAssertKernel); + REQUIRE(abort_raised_flag == 0); +} + +/** + * Test Description + * ------------------------ + * - Launches kernels with asserts that have an expression equal to 0. + * - Expects that SIGABRT is raised and kernels have been stopped on AMD. + * - The HIP runtime also aborts the host code, so this test case uses signal handlers + * to avoid host code abortion. + * - Expects that `hipErrorAssert` is returned from `hipDeviceSynchronize` on NVIDIA. + * - The host code is not aborted. + * Test source + * ------------------------ + * - unit/assertion/assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Assert_Positive_Basic_KernelFail") { + try_and_catch_abort(&LaunchAssertKernel); +#if HT_AMD + REQUIRE(abort_raised_flag == 1); +#else + REQUIRE(abort_raised_flag == 0); +#endif +} diff --git a/catch/unit/assertion/static_assert.cc b/catch/unit/assertion/static_assert.cc new file mode 100644 index 0000000000..508db295b7 --- /dev/null +++ b/catch/unit/assertion/static_assert.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include "static_assert_kernels_rtc.hh" + +/** + * @addtogroup static_assert static_assert + * @{ + * @ingroup DeviceLanguageTest + * `void static_assert(constexpr expression, const char* message)` - + * Stops the compilation if expression is equal to zero, and displays the specified message. + */ + +void StaticAssertWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "static_assert_rtc.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} + +/** + * Test Description + * ------------------------ + * - Compiles kernels with static_assert calls: + * -# Expected that static_assert passes and compilation is successful. + * -# Expected that static_assert fails and compilation has errors. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Positive_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Positive); } + +/** + * Test Description + * ------------------------ + * - Passes invalidly formed expressions to static_assert calls. + * - Uses expressions that are not constexpr and values that are not known during compilation. + * - Uses RTC to perform compilation. + * Test source + * ------------------------ + * - unit/assertion/static_assert.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_StaticAssert_Negative_Basic_RTC") { StaticAssertWrapper(kStaticAssert_Negative); } diff --git a/catch/unit/assertion/static_assert_kernels_negative.cc b/catch/unit/assertion/static_assert_kernels_negative.cc new file mode 100644 index 0000000000..777f27855c --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_negative.cc @@ -0,0 +1,30 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); +} + +__global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); +} diff --git a/catch/unit/assertion/static_assert_kernels_positive.cc b/catch/unit/assertion/static_assert_kernels_positive.cc new file mode 100644 index 0000000000..2ed0d7b68c --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_positive.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +__global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); +} + +__global__ void StaticAssertPassKernel2() { static_assert(10 > 5, "[StaticAssertPassKernel2]"); } + +__global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); +} + +__global__ void StaticAssertFailKernel2() { static_assert(10 < 5, "[StaticAssertFailKernel2]"); } diff --git a/catch/unit/assertion/static_assert_kernels_rtc.hh b/catch/unit/assertion/static_assert_kernels_rtc.hh new file mode 100644 index 0000000000..5bb7419e30 --- /dev/null +++ b/catch/unit/assertion/static_assert_kernels_rtc.hh @@ -0,0 +1,56 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Positive and negative kernels used for the static_assert Test Cases that are using RTC. +*/ + +static constexpr auto kStaticAssert_Positive{ + R"( + __global__ void StaticAssertPassKernel1() { + static_assert(sizeof(int) < sizeof(long), "[StaticAssertPassKernel1]"); + } + + __global__ void StaticAssertPassKernel2() { + static_assert(10 > 5, "[StaticAssertPassKernel2]"); + } + + __global__ void StaticAssertFailKernel1() { + static_assert(sizeof(int) > sizeof(long), "[StaticAssertFailKernel1]"); + } + + __global__ void StaticAssertFailKernel2() { + static_assert(10 < 5, "[StaticAssertFailKernel2]"); + } + )"}; + +static constexpr auto kStaticAssert_Negative{ + R"( + __global__ void StaticAssertErrorKernel1() { + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(tid % 2 == 1, "[StaticAssertErrorKernel1]"); + } + + __global__ void StaticAssertErrorKernel2() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(++tid > 2, "[StaticAssertErrorKernel2]"); + } + )"}; diff --git a/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc b/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc index 4bceaa41b7..effb4f68e6 100644 --- a/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc +++ b/catch/unit/graph/hipGraphAddMemcpyNodeFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -75,7 +74,7 @@ void GraphMemcpyFromSymbolShell(void* symbol, size_t offset, const std::vector= 5.2 - */ + */ TEST_CASE("Unit_hipGraphAddMemcpyNodeFromSymbol_Negative_Parameters") { using namespace std::placeholders; hipGraph_t graph = nullptr; diff --git a/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc b/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc index 1c8c047f9e..3163443944 100644 --- a/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc +++ b/catch/unit/graph/hipGraphAddMemcpyNodeToSymbol.cc @@ -23,7 +23,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -78,7 +77,7 @@ void GraphMemcpyToSymbolShell(const void* symbol, size_t offset, const std::vect * - Verify that data is correctly copied to a symbol. A graph is constructed to which a * MemcpyToSymbol node is added. After graph execution, a MemcpyFromSymbol is performed and * the copied values are compared against values known to have been copied to symbol memory - * previously. + * previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for source memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphAddMemsetNode.cc b/catch/unit/graph/hipGraphAddMemsetNode.cc index 4d4359b2b1..af502ab07a 100644 --- a/catch/unit/graph/hipGraphAddMemsetNode.cc +++ b/catch/unit/graph/hipGraphAddMemsetNode.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include #include @@ -129,7 +128,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_Negative_Parameters") { * Allocate a 2D array using hipMallocPitch. Initialize the allocated memory * using hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { CHECK_IMAGE_SUPPORT @@ -147,22 +146,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { } } // 2D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - numH)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, numH)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -173,21 +170,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { myparms.extent = make_hipExtent(width, numH, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -200,12 +196,12 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_2D") { * Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { CHECK_IMAGE_SUPPORT size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A; - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(numW, ' '); @@ -213,22 +209,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { hipGraph_t graph; std::vector nodeDependencies; // 1D Memory allocation hipMallocPitch - HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, - 1)); + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, 1)); // Create Graph HIP_CHECK(hipGraphCreate(&graph, 0)); hipGraphNode_t memsetNode, memcpyNode; // Add MemSet Node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = pitch_A; memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipMemcpy3DParms myparms{}; @@ -239,15 +233,14 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { myparms.extent = make_hipExtent(width, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); @@ -264,7 +257,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocPitch_1D") { * Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { CHECK_IMAGE_SUPPORT @@ -300,8 +293,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = numH; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -315,22 +307,21 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { for (size_t j = 0; j < numH; j++) { - REQUIRE(*(A_h + i*numH + j) == memSetVal); + REQUIRE(*(A_h + i * numH + j) == memSetVal); } } HIP_CHECK(hipGraphExecDestroy(graphExec)); @@ -343,7 +334,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_2D") { * Allocate a 1D array using hipMalloc3D. Initialize the allocated * memory using hipGraphAddMemsetNode. Copy the values in device * memory to host using hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { CHECK_IMAGE_SUPPORT @@ -375,8 +366,7 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { memsetParams.elementSize = sizeof(char); memsetParams.width = numW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // MemCpy params @@ -390,21 +380,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { // Add MemCpy Node HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < numW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -415,9 +404,9 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc3D_1D") { * Allocate a 1D array using hipMalloc. Initialize the allocated memory using * hipGraphAddMemsetNode. Copy the values in device memory to host using * hipGraphAddMemcpyNode. Verify the results. -*/ + */ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { - char *A_d; + char* A_d; size_t NumW = SIZE; size_t Nbytes1D = SIZE * sizeof(char); @@ -436,14 +425,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = NumW; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0}; @@ -456,20 +444,19 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < NumW; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); HIP_CHECK(hipGraphDestroy(graph)); @@ -479,16 +466,15 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMalloc_1D") { TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { int managed = 0; - HIP_CHECK(hipDeviceGetAttribute(&managed, - hipDeviceAttributeManagedMemory, 0)); + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); INFO("hipDeviceAttributeManagedMemory: " << managed); if (managed != 1) { WARN( - "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" - "so defaulting to system memory."); + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" + "so defaulting to system memory."); } size_t Nbytes1D = SIZE * sizeof(char); - char *A_d; + char* A_d; // Initialize the host memory std::vector A_h(SIZE, ' '); // Device Memory @@ -502,14 +488,13 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { // Add Memset node hipMemsetParams memsetParams{}; memset(&memsetParams, 0, sizeof(memsetParams)); - memsetParams.dst = reinterpret_cast(A_d); + memsetParams.dst = reinterpret_cast(A_d); memsetParams.value = memSetVal; memsetParams.pitch = Nbytes1D; memsetParams.elementSize = sizeof(char); memsetParams.width = SIZE; memsetParams.height = 1; - HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, - &memsetParams)); + HIP_CHECK(hipGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams)); nodeDependencies.push_back(memsetNode); // Add MemCpy Node @@ -524,21 +509,20 @@ TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { myparms.extent = make_hipExtent(Nbytes1D, 1, 1); myparms.kind = hipMemcpyDeviceToHost; HIP_CHECK(hipGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), - nodeDependencies.size(), &myparms)); + nodeDependencies.size(), &myparms)); nodeDependencies.clear(); // Create executable graph hipStream_t streamForGraph; hipGraphExec_t graphExec; HIP_CHECK(hipStreamCreate(&streamForGraph)); - HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); HIP_CHECK(hipStreamSynchronize(streamForGraph)); // Verfication for (size_t i = 0; i < SIZE; i++) { - REQUIRE(A_h[i] == memSetVal); + REQUIRE(A_h[i] == memSetVal); } HIP_CHECK(hipGraphExecDestroy(graphExec)); diff --git a/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc b/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc index 585435e684..3d8681eeb4 100644 --- a/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc +++ b/catch/unit/graph/hipGraphExecMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphExecMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_s * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. After the graph is instantiated the parameters are updated to correct * values and the graph executed. After graph execution, a MemcpyFromSymbol is performed and the - * copied values are compared against values known to have been copied to symbol memory previously. + * copied values are compared against values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc b/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc index edecbfad9a..ee2282b425 100644 --- a/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc +++ b/catch/unit/graph/hipGraphExecMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -46,7 +45,7 @@ THE SOFTWARE. * which also constitutes a test for said API. * The test is repeated for all valid element sizes(1, * 2, 4), and several allocations of different width(height is always 1 because only 1D memset nodes - * can be updated), both on host and device + * can be updated), both on host and device * Test source * ------------------------ * - unit/graph/hipGraphExecMemsetNodeSetParams.cc diff --git a/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc b/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc index 7f1ac7fe3c..b8c10c3900 100644 --- a/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc +++ b/catch/unit/graph/hipGraphMemcpyNodeSetParamsFromSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -85,7 +84,7 @@ void GraphMemcpyFromSymbolSetParamsShell(const void* symbol, const void* alt_sym * - Verify that data is correctly copied from a symbol after node parameters are set following * node addition. A graph is constructed to which a MemcpyFromSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. - * Values in destination memory are compared against values known to be in symbol memory. + * Values in destination memory are compared against values known to be in symbol memory. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc b/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc index 0f84b6b283..b62b01cf5b 100644 --- a/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc +++ b/catch/unit/graph/hipGraphMemcpyNodeSetParamsToSymbol.cc @@ -22,7 +22,6 @@ THE SOFTWARE. #include #include -#include #include #include @@ -86,7 +85,7 @@ void GraphMemcpyToSymbolSetParamsShell(const void* symbol, const void* alt_symbo * node addition. A graph is constructed to which a MemcpyToSymbol node is added with valid but * incorrect parameters. The parameters are then updated to correct values and the graph executed. * After graph execution, a MemcpyFromSymbol is performed and the copied values are compared against - * values known to have been copied to symbol memory previously. + * values known to have been copied to symbol memory previously. * The test is run for scalar, const scalar, array, and const array symbols of types char, int, * float and double. For array symbols, the test is repeated for zero and non-zero offset values. * Verification is performed for destination memory allocated on host and device. diff --git a/catch/unit/graph/hipGraphMemsetNodeGetParams.cc b/catch/unit/graph/hipGraphMemsetNodeGetParams.cc index 25fe849206..1c640db2de 100644 --- a/catch/unit/graph/hipGraphMemsetNodeGetParams.cc +++ b/catch/unit/graph/hipGraphMemsetNodeGetParams.cc @@ -19,7 +19,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include #include #include diff --git a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc index d8f7cac249..af8e6d50da 100644 --- a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc +++ b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include -#include #include #include "graph_memset_node_test_common.hh" @@ -44,7 +43,7 @@ THE SOFTWARE. * The parameters are also verified via hipGraphMemsetNodeGetParams, which also constitutes a test * for said API. * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different - * height and width both on host and device + * height and width both on host and device * Test source * ------------------------ * - unit/graph/hipGraphMemsetNodeSetParams.cc diff --git a/catch/unit/graph/hipLaunchHostFunc.cc b/catch/unit/graph/hipLaunchHostFunc.cc index 1d9ea95b00..fe82055061 100644 --- a/catch/unit/graph/hipLaunchHostFunc.cc +++ b/catch/unit/graph/hipLaunchHostFunc.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamBeginCapture.cc b/catch/unit/graph/hipStreamBeginCapture.cc index 9814a05097..21a2edec0b 100644 --- a/catch/unit/graph/hipStreamBeginCapture.cc +++ b/catch/unit/graph/hipStreamBeginCapture.cc @@ -19,8 +19,7 @@ THE SOFTWARE. #include #include -#include -#include "stream_capture_common.hh" // NOLINT +#include "stream_capture_common.hh" // NOLINT #pragma clang diagnostic ignored "-Wunused-variable" /** @@ -56,8 +55,7 @@ static void hostNodeCallback(void* data) { } template -void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, - hipStream_t stream) { +void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, hipStream_t stream) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(T); @@ -89,8 +87,7 @@ void captureStreamAndLaunchGraph(F graphFunc, hipStreamCaptureMode mode, std::fill_n(A_h.host_ptr(), N, static_cast(i)); HIP_CHECK(hipGraphLaunch(graphExec, stream)); HIP_CHECK(hipStreamSynchronize(stream)); - ArrayFindIfNot(B_h.host_ptr(), - static_cast(i) * static_cast(i), N); + ArrayFindIfNot(B_h.host_ptr(), static_cast(i) * static_cast(i), N); } HIP_CHECK(hipGraphExecDestroy(graphExec)) @@ -117,16 +114,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { StreamGuard stream_guard(stream_type); hipStream_t stream = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); EventsGuard events_guard(3); StreamsGuard streams_guard(2); SECTION("Linear graph capture") { captureStreamAndLaunchGraph( - [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, - hipStream_t stream) { + [](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, hipStream_t stream) { return captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); }, captureMode, stream); @@ -134,10 +130,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Functional") { SECTION("Branched graph capture") { captureStreamAndLaunchGraph( - [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, - float* B_d, size_t N, hipStream_t stream) { - captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, - streams_guard.stream_list(), events_guard.event_list()); + [&streams_guard, &events_guard](float* A_h, float* A_d, float* B_h, float* B_d, size_t N, + hipStream_t stream) { + captureSequenceBranched(A_h, A_d, B_h, B_d, N, stream, streams_guard.stream_list(), + events_guard.event_list()); }, captureMode, stream); } @@ -173,8 +169,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { hipErrorIllegalState); } SECTION("Creating hipStream with invalid mode") { - HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamBeginCapture(stream, hipStreamCaptureMode(-1)), hipErrorInvalidValue); } #if HT_NVIDIA // EXSWHTEC-216 SECTION("Stream capture on uninitialized stream returns error code.") { @@ -182,8 +177,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_Parameters") { StreamGuard sg(Streams::created); return sg.stream(); }; - HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(InvalidStream(), hipStreamCaptureModeGlobal), hipErrorContextIsDestroyed); } #endif @@ -207,8 +201,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { StreamGuard stream_guard(stream_type); hipStream_t s = stream_guard.stream(); - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(s, captureMode)); @@ -218,8 +212,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Basic") { /* Local function for inter stream event synchronization */ -static void interStrmEventSyncCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void interStrmEventSyncCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -266,8 +259,7 @@ static void interStrmEventSyncCapture(const hipStream_t& stream1, /* Local function for colligated stream capture */ -static void colligatedStrmCapture(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCapture(const hipStream_t& stream1, const hipStream_t& stream2) { hipGraph_t graph1{nullptr}, graph2{nullptr}; hipGraphExec_t graphExec1{nullptr}, graphExec2{nullptr}; @@ -310,8 +302,7 @@ static void colligatedStrmCapture(const hipStream_t& stream1, /* Local function for colligated stream capture functionality */ -static void colligatedStrmCaptureFunc(const hipStream_t& stream1, - const hipStream_t& stream2) { +static void colligatedStrmCaptureFunc(const hipStream_t& stream1, const hipStream_t& stream2) { constexpr size_t N = 1000000; size_t Nbytes = N * sizeof(int); @@ -331,10 +322,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, // Capture 2 streams HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamBeginCapture(stream2, hipStreamCaptureModeGlobal)); - captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), - N, stream1); - captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), - N, stream2); + captureSequenceLinear(A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); + captureSequenceLinear(C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); captureSequenceCompute(A_d.ptr(), B_h.host_ptr(), B_d.ptr(), N, stream1); captureSequenceCompute(C_d.ptr(), D_h.host_ptr(), D_d.ptr(), N, stream2); HIP_CHECK(hipStreamEndCapture(stream1, &graph1)); @@ -370,9 +359,8 @@ static void colligatedStrmCaptureFunc(const hipStream_t& stream1, /* Stream Capture thread function */ -static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, - int* B_h, int* B_d, hipGraph_t* graph, - size_t N, hipStreamCaptureMode mode) { +static void threadStrmCaptureFunc(hipStream_t stream, int* A_h, int* A_d, int* B_h, int* B_d, + hipGraph_t* graph, size_t N, hipStreamCaptureMode mode) { // Capture stream HIP_CHECK(hipStreamBeginCapture(stream, mode)); captureSequenceLinear(A_h, A_d, B_h, B_d, N, stream); @@ -404,10 +392,10 @@ static void multithreadedTest(hipStreamCaptureMode mode) { LinearAllocGuard D_d(LinearAllocs::hipMalloc, Nbytes); // Launch 2 threads to capture the 2 streams into graphs - std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), - B_h.host_ptr(), B_d.ptr(), &graph1, N, mode); - std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), - D_h.host_ptr(), D_d.ptr(), &graph2, N, mode); + std::thread t1(threadStrmCaptureFunc, stream1, A_h.host_ptr(), A_d.ptr(), B_h.host_ptr(), + B_d.ptr(), &graph1, N, mode); + std::thread t2(threadStrmCaptureFunc, stream2, C_h.host_ptr(), C_d.ptr(), D_h.host_ptr(), + D_d.ptr(), &graph2, N, mode); t1.join(); t2.join(); @@ -480,11 +468,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_InterStrmEventSync_Priority") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); interStrmEventSyncCapture(stream1, stream2); } @@ -533,11 +519,9 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Flags") { TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCapture_Prio") { int minPriority = 0, maxPriority = 0; HIP_CHECK(hipDeviceGetStreamPriorityRange(&minPriority, &maxPriority)); - StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, - minPriority); + StreamGuard stream_guard1(Streams::withPriority, hipStreamDefault, minPriority); hipStream_t stream1 = stream_guard1.stream(); - StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, - maxPriority); + StreamGuard stream_guard2(Streams::withPriority, hipStreamDefault, maxPriority); hipStream_t stream2 = stream_guard2.stream(); colligatedStrmCapture(stream1, stream2); } @@ -578,8 +562,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_ColligatedStrmCaptureFunc") { * - HIP_VERSION >= 5.2 */ TEST_CASE("Unit_hipStreamBeginCapture_Positive_Multithreaded") { - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); multithreadedTest(captureMode); } @@ -708,8 +692,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingFromWithinStrms") { HIP_CHECK(hipEventRecord(events[2], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[1], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem, 0, sizeof(int))); @@ -751,8 +734,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_DetectingInvalidCapture") { dummyKernel<<<1, 1, 0, streams[0]>>>(); // Since stream[1] is already in capture mode due to event wait // hipStreamBeginCapture on stream[1] is expected to return error. - HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], - hipStreamCaptureModeGlobal), + HIP_CHECK_ERROR(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal), hipErrorIllegalState); } @@ -785,8 +767,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_CapturingMultGraphsFrom1Strm") { for (int i = 0; i < 3; i++) { HIP_CHECK(hipStreamBeginCapture(stream1, hipStreamCaptureModeGlobal)); for (int j = 0; j <= i; j++) incrementKernel<<<1, 1, 0, stream1>>>(devMem); - HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, - stream1)); + HIP_CHECK(hipMemcpyAsync(hostMem, devMem, sizeof(int), hipMemcpyDefault, stream1)); HIP_CHECK(hipStreamEndCapture(stream1, &graphs[i])); } // Instantiate and execute all graphs @@ -825,22 +806,19 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_CheckingSyncDuringCapture") { EventsGuard events_guard(1); hipEvent_t e = events_guard[0]; - const hipStreamCaptureMode captureMode = GENERATE(hipStreamCaptureModeGlobal, - hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); + const hipStreamCaptureMode captureMode = GENERATE( + hipStreamCaptureModeGlobal, hipStreamCaptureModeThreadLocal, hipStreamCaptureModeRelaxed); HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("Synchronize stream during capture") { - HIP_CHECK_ERROR(hipStreamSynchronize(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamSynchronize(stream), hipErrorStreamCaptureUnsupported); } SECTION("Query stream during capture") { - HIP_CHECK_ERROR(hipStreamQuery(stream), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipStreamQuery(stream), hipErrorStreamCaptureUnsupported); } #if HT_NVIDIA SECTION("Synchronize device during capture") { - HIP_CHECK_ERROR(hipDeviceSynchronize(), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipDeviceSynchronize(), hipErrorStreamCaptureUnsupported); } SECTION("Synchronize event during capture") { HIP_CHECK(hipEventRecord(e, stream)); @@ -884,17 +862,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_UnsafeCallsDuringCapture") { HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); SECTION("hipMalloc during capture") { - HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), - hipErrorStreamCaptureUnsupported); + HIP_CHECK_ERROR(hipMalloc(&devMem2, sizeof(int)), hipErrorStreamCaptureUnsupported); } SECTION("hipMemcpy during capture") { - HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), - hipMemcpyHostToDevice), + HIP_CHECK_ERROR(hipMemcpy(devMem.ptr(), hostMem.host_ptr(), sizeof(int), hipMemcpyHostToDevice), hipErrorStreamCaptureImplicit); } SECTION("hipMemset during capture") { - HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), - hipErrorStreamCaptureImplicit); + HIP_CHECK_ERROR(hipMemset(devMem.ptr(), 0, sizeof(int)), hipErrorStreamCaptureImplicit); } } #endif @@ -931,8 +906,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e, stream1)); HIP_CHECK(hipStreamWaitEvent(stream2, e, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } SECTION("End strm capture when forked strm still has operations") { EventsGuard events_guard(2); @@ -946,8 +920,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Negative_EndingCapwhenCapInProg") { HIP_CHECK(hipEventRecord(e2, stream2)); HIP_CHECK(hipStreamWaitEvent(stream1, e2, 0)); dummyKernel<<<1, 1, 0, stream2>>>(); - HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), - hipErrorStreamCaptureUnjoined); + HIP_CHECK_ERROR(hipStreamEndCapture(stream1, &graph), hipErrorStreamCaptureUnjoined); } } /** @@ -970,19 +943,17 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { SUCCEED("skipping the testcases as numDevices < 2"); return; } - hipStream_t* stream = reinterpret_cast - (malloc(devcount * sizeof(hipStream_t))); + hipStream_t* stream = reinterpret_cast(malloc(devcount * sizeof(hipStream_t))); REQUIRE(stream != nullptr); - hipGraph_t* graph = reinterpret_cast - (malloc(devcount * sizeof(hipGraph_t))); + hipGraph_t* graph = reinterpret_cast(malloc(devcount * sizeof(hipGraph_t))); REQUIRE(graph != nullptr); int **devMem{nullptr}, **hostMem{nullptr}; hostMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(hostMem != nullptr); devMem = reinterpret_cast(malloc(sizeof(int*) * devcount)); REQUIRE(devMem != nullptr); - hipGraphExec_t* graphExec = reinterpret_cast - (malloc(devcount * sizeof(hipGraphExec_t))); + hipGraphExec_t* graphExec = + reinterpret_cast(malloc(devcount * sizeof(hipGraphExec_t))); // Capture stream in each device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); @@ -994,15 +965,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_MultiGPU") { for (int i = 0; i < (dev + 1); i++) { incrementKernel<<<1, 1, 0, stream[dev]>>>(devMem[dev]); } - HIP_CHECK(hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), - hipMemcpyDefault, stream[dev])); + HIP_CHECK( + hipMemcpyAsync(hostMem[dev], devMem[dev], sizeof(int), hipMemcpyDefault, stream[dev])); HIP_CHECK(hipStreamEndCapture(stream[dev], &graph[dev])); } // Launch the captured graphs in the respective device for (int dev = 0; dev < devcount; dev++) { HIP_CHECK(hipSetDevice(dev)); - HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, - nullptr, 0)); + HIP_CHECK(hipGraphInstantiate(&graphExec[dev], graph[dev], nullptr, nullptr, 0)); HIP_CHECK(hipGraphLaunch(graphExec[dev], stream[dev])); } // Validate output @@ -1069,8 +1039,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_nestedStreamCapture") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem_g.host_ptr(), devMem_g.ptr(), sizeof(int), hipMemcpyDefault, + streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem_g.ptr(), 0, sizeof(int))); @@ -1108,23 +1078,15 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { hipGraph_t graphs[3]; StreamsGuard streams(3); EventsGuard events(4); - LinearAllocGuard hostMem_g1 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g2 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard hostMem_g3 = LinearAllocGuard - (LinearAllocs::malloc, sizeof(int)); - LinearAllocGuard devMem_g1 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g2 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); - LinearAllocGuard devMem_g3 = LinearAllocGuard - (LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard hostMem_g1 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g2 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard hostMem_g3 = LinearAllocGuard(LinearAllocs::malloc, sizeof(int)); + LinearAllocGuard devMem_g1 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g2 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard devMem_g3 = LinearAllocGuard(LinearAllocs::hipMalloc, sizeof(int)); - std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), - hostMem_g3.host_ptr()}; - std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), - devMem_g3.ptr()}; + std::vector hostMem = {hostMem_g1.host_ptr(), hostMem_g2.host_ptr(), hostMem_g3.host_ptr()}; + std::vector devMem = {devMem_g1.ptr(), devMem_g2.ptr(), devMem_g3.ptr()}; // Create a device memory of size int and initialize it to 0 for (int i = 0; i < 3; i++) { memset(hostMem[i], 0, sizeof(int)); @@ -1148,16 +1110,14 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { HIP_CHECK(hipEventRecord(events[3], streams[2])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipStreamWaitEvent(streams[0], events[2], 0)); - HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), - hipMemcpyDefault, streams[0])); + HIP_CHECK(hipMemcpyAsync(hostMem[0], devMem[0], sizeof(int), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graphs[0])); // End Capture // Start capturing graph2 from stream 2 HIP_CHECK(hipStreamBeginCapture(streams[1], hipStreamCaptureModeGlobal)); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); incrementKernel<<<1, 1, 0, streams[1]>>>(devMem[1]); - HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), - hipMemcpyDefault, streams[1])); + HIP_CHECK(hipMemcpyAsync(hostMem[1], devMem[1], sizeof(int), hipMemcpyDefault, streams[1])); HIP_CHECK(hipStreamEndCapture(streams[1], &graphs[1])); // End Capture // Start capturing graph3 from stream 3 HIP_CHECK(hipStreamBeginCapture(streams[2], hipStreamCaptureModeGlobal)); @@ -1166,8 +1126,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_streamReuse") { incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); incrementKernel<<<1, 1, 0, streams[2]>>>(devMem[2]); - HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), - hipMemcpyDefault, streams[2])); + HIP_CHECK(hipMemcpyAsync(hostMem[2], devMem[2], sizeof(int), hipMemcpyDefault, streams[2])); HIP_CHECK(hipStreamEndCapture(streams[2], &graphs[2])); // End Capture // Reset device memory HIP_CHECK(hipMemset(devMem[0], 0, sizeof(int))); @@ -1211,40 +1170,32 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { EventsGuard events(7); // Allocate Device memory and Host memory size_t N = GRIDSIZE * BLOCKSIZE; - LinearAllocGuard Ah = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ch = LinearAllocGuard - (LinearAllocs::malloc, N * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard - (LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ch = LinearAllocGuard(LinearAllocs::malloc, N * sizeof(int)); + LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); + LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, N * sizeof(int)); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(streams[0], hipStreamCaptureModeGlobal)); HIP_CHECK(hipEventRecord(events[0], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[3], events[0], 0)); HIP_CHECK(hipStreamWaitEvent(streams[4], events[0], 0)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[4])); + HIP_CHECK( + hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[4])); hipHostFn_t fn = hostNodeCallback; HIPCHECK(hipLaunchHostFunc(streams[3], fn, nullptr)); HIP_CHECK(hipEventRecord(events[1], streams[0])); HIP_CHECK(hipStreamWaitEvent(streams[1], events[1], 0)); int* Ad_2nd_half = Ad.ptr() + N / 2; int* Ad_1st_half = Ad.ptr(); - mymul<<>>(Ad_2nd_half, - CONST_KER2_VAL); - mymul<<>>(Ad_1st_half, - CONST_KER1_VAL); + mymul<<>>(Ad_2nd_half, CONST_KER2_VAL); + mymul<<>>(Ad_1st_half, CONST_KER1_VAL); HIP_CHECK(hipEventRecord(events[2], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[2], events[2], 0)); - mymul<<>>(Ad_1st_half, - CONST_KER3_VAL); + mymul<<>>(Ad_1st_half, CONST_KER3_VAL); HIPCHECK(hipLaunchHostFunc(streams[2], fn, nullptr)); HIP_CHECK(hipEventRecord(events[6], streams[1])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[6], 0)); @@ -1255,8 +1206,8 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamWaitEvent(streams[0], events[3], 0)); HIP_CHECK(hipEventRecord(events[4], streams[3])); HIP_CHECK(hipStreamWaitEvent(streams[0], events[4], 0)); - HIP_CHECK(hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), - hipMemcpyDefault, streams[0])); + HIP_CHECK( + hipMemcpyAsync(Ch.host_ptr(), Ad.ptr(), (N * sizeof(int)), hipMemcpyDefault, streams[0])); HIP_CHECK(hipStreamEndCapture(streams[0], &graph)); // End Capture // Execute and test the graph hipGraphExec_t graphExec{nullptr}; @@ -1269,11 +1220,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_Positive_captureComplexGraph") { HIP_CHECK(hipStreamSynchronize(streams[0])); for (size_t i = 0; i < N; i++) { if (i > (N / 2 - 1)) { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER2_VAL)); + REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER2_VAL)); } else { - REQUIRE(Ch.host_ptr()[i] == (Bh.host_ptr()[i] + - Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); + REQUIRE(Ch.host_ptr()[i] == + (Bh.host_ptr()[i] + Ah.host_ptr()[i] * CONST_KER1_VAL * CONST_KER3_VAL)); } } } @@ -1340,14 +1290,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { hipGraph_t graph{nullptr}; hipGraphExec_t graphExec{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1357,10 +1305,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation Before Capture") { StreamsGuard stream1(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture @@ -1368,10 +1316,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Synchronizing multiple streams during Capture") { StreamsGuard stream1(1), stream2(1); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream2[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream2[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamSynchronize(stream2[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1380,20 +1328,20 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { SECTION("Stream Creation After Capture") { HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture } SECTION("Stream Synchronize Before Capture") { StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); HIP_CHECK(hipStreamBeginCapture(stream0[0], flag)); myadd<<>>(Ad.ptr(), Bd.ptr()); @@ -1404,10 +1352,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { myadd<<>>(Ad.ptr(), Bd.ptr()); HIP_CHECK(hipStreamEndCapture(stream0[0], &graph)); // End Capture StreamsGuard stream1(1); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream1[0])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream1[0])); HIP_CHECK(hipStreamSynchronize(stream1[0])); } // Execute and test the graph @@ -1415,8 +1363,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { HIP_CHECK(hipGraphLaunch(graphExec, stream0[0])); HIP_CHECK(hipStreamSynchronize(stream0[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } @@ -1437,20 +1384,16 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture") { * - HIP_VERSION >= 5.6 */ // Local function executed as thread -static void strmSyncThread(int *Ah, int *Ad, int *Bh, int *Bd, - int BLOCKSIZE, hipError_t *error) { +static void strmSyncThread(int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, hipError_t* error) { StreamsGuard stream(1); - HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); - HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), - hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Ad, Ah, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); + HIP_CHECK(hipMemcpyAsync(Bd, Bh, BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[0])); *error = hipStreamSynchronize(stream[0]); } // Local function executed as thread -static void captureStrmThread(hipGraph_t *graph, int *Ah, int *Ad, - int *Bh, int *Bd, int BLOCKSIZE, int GRIDSIZE, - hipStreamCaptureMode flag, hipError_t *error) { +static void captureStrmThread(hipGraph_t* graph, int* Ah, int* Ad, int* Bh, int* Bd, int BLOCKSIZE, + int GRIDSIZE, hipStreamCaptureMode flag, hipError_t* error) { StreamsGuard stream(1); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], flag)); @@ -1466,14 +1409,12 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { constexpr int VALUE1 = 7, VALUE2 = 11; hipGraph_t graph{nullptr}; // Allocate device memory - LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Ad = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, - BLOCKSIZE * sizeof(int)); - LinearAllocGuard Bd = LinearAllocGuard(LinearAllocs::hipMalloc, - BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ah = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Ad = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bh = LinearAllocGuard(LinearAllocs::malloc, BLOCKSIZE * sizeof(int)); + LinearAllocGuard Bd = + LinearAllocGuard(LinearAllocs::hipMalloc, BLOCKSIZE * sizeof(int)); // Fill input data std::fill_n(Ah.host_ptr(), BLOCKSIZE, VALUE1); std::fill_n(Bh.host_ptr(), BLOCKSIZE, VALUE2); @@ -1483,10 +1424,10 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { StreamsGuard stream(2); // Capture streams into graph HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeGlobal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } @@ -1494,34 +1435,30 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Single Threaded") { StreamsGuard stream(2); // Capture streams into graph - HIP_CHECK(hipStreamBeginCapture(stream[0], - hipStreamCaptureModeThreadLocal)); - HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); - HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), - BLOCKSIZE * sizeof(int), hipMemcpyDefault, stream[1])); + HIP_CHECK(hipStreamBeginCapture(stream[0], hipStreamCaptureModeThreadLocal)); + HIP_CHECK(hipMemcpyAsync(Ad.ptr(), Ah.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); + HIP_CHECK(hipMemcpyAsync(Bd.ptr(), Bh.host_ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDefault, + stream[1])); error = hipStreamSynchronize(stream[1]); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif #if HT_AMD SECTION("Capture Flag = hipStreamCaptureModeGlobal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeGlobal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeGlobal, &error); REQUIRE(error == hipErrorStreamCaptureUnsupported); } #endif SECTION("Capture Flag = hipStreamCaptureModeThreadLocal Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeThreadLocal, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeThreadLocal, &error); REQUIRE(error == hipSuccess); } SECTION("Capture Flag = hipStreamCaptureModeRelaxed Multithreaded") { - captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), - Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, - hipStreamCaptureModeRelaxed, &error); + captureStrmThread(&graph, Ah.host_ptr(), Ad.ptr(), Bh.host_ptr(), Bd.ptr(), BLOCKSIZE, GRIDSIZE, + hipStreamCaptureModeRelaxed, &error); REQUIRE(error == hipSuccess); } if (graph != nullptr) { @@ -1532,8 +1469,7 @@ TEST_CASE("Unit_hipStreamBeginCapture_StreamSync_OngoingCapture_MThread") { HIP_CHECK(hipGraphLaunch(graphExec, stream[0])); HIP_CHECK(hipStreamSynchronize(stream[0])); // Check output - HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), - hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(Ah.host_ptr(), Ad.ptr(), BLOCKSIZE * sizeof(int), hipMemcpyDeviceToHost)); for (int idx = 0; idx < BLOCKSIZE; idx++) { REQUIRE(Ah.host_ptr()[idx] == (VALUE1 + VALUE2)); } diff --git a/catch/unit/graph/hipStreamEndCapture.cc b/catch/unit/graph/hipStreamEndCapture.cc index 39f25ce2a6..7ec1ea88a6 100644 --- a/catch/unit/graph/hipStreamEndCapture.cc +++ b/catch/unit/graph/hipStreamEndCapture.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamGetCaptureInfo.cc b/catch/unit/graph/hipStreamGetCaptureInfo.cc index d8f8cb5d55..9c3317ed85 100644 --- a/catch/unit/graph/hipStreamGetCaptureInfo.cc +++ b/catch/unit/graph/hipStreamGetCaptureInfo.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc b/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc index ea67318ef9..0dde7247b1 100644 --- a/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc +++ b/catch/unit/graph/hipStreamGetCaptureInfo_v2.cc @@ -19,7 +19,6 @@ THE SOFTWARE. #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamIsCapturing.cc b/catch/unit/graph/hipStreamIsCapturing.cc index c6a77c316e..256d20f21d 100644 --- a/catch/unit/graph/hipStreamIsCapturing.cc +++ b/catch/unit/graph/hipStreamIsCapturing.cc @@ -18,7 +18,6 @@ THE SOFTWARE. */ #include -#include #include #include "stream_capture_common.hh" diff --git a/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc b/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc index e35dd317d6..e11e1c3e24 100644 --- a/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc +++ b/catch/unit/graph/hipStreamUpdateCaptureDependencies.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" @@ -367,7 +366,7 @@ TEST_CASE("Unit_hipStreamUpdateCaptureDependencies_Positive_Parameters") { const hipStreamUpdateCaptureDependenciesFlags flag = GENERATE(hipStreamAddCaptureDependencies, hipStreamSetCaptureDependencies); - HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); //hipStreamCaptureModeGlobal)); + HIP_CHECK(hipStreamBeginCapture(stream, captureMode)); // hipStreamCaptureModeGlobal)); HIP_CHECK(hipStreamUpdateCaptureDependencies(stream, nullptr, 0, flag)); diff --git a/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc b/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc index c35fc18900..5ac784bc79 100644 --- a/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc +++ b/catch/unit/graph/hipThreadExchangeStreamCaptureMode.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #include "stream_capture_common.hh" diff --git a/catch/unit/kernel/hipShflTests.cc b/catch/unit/kernel/hipShflTests.cc index 89c529c16b..3525602bd0 100644 --- a/catch/unit/kernel/hipShflTests.cc +++ b/catch/unit/kernel/hipShflTests.cc @@ -21,7 +21,6 @@ THE SOFTWARE. #include #include #include -#include #define WIDTH 4 @@ -32,20 +31,17 @@ THE SOFTWARE. #define THREADS_PER_BLOCK_Z 1 // Device (Kernel) function, it must be void -template -__global__ void matrixTranspose(T* out, T* in, const int width) { +template __global__ void matrixTranspose(T* out, T* in, const int width) { int x = blockDim.x * blockIdx.x + threadIdx.x; T val = in[x]; for (int i = 0; i < width; i++) { - for (int j = 0; j < width; j++) - out[i * width + j] = __shfl(val, j * width + i); + for (int j = 0; j < width; j++) out[i * width + j] = __shfl(val, j * width + i); } } // CPU implementation of matrix transpose template -void matrixTransposeCPUReference(T* output, - T* input, const unsigned int width) { +void matrixTransposeCPUReference(T* output, T* input, const unsigned int width) { for (unsigned int j = 0; j < width; j++) { for (unsigned int i = 0; i < width; i++) { output[i * width + j] = input[j * width + i]; @@ -54,61 +50,52 @@ void matrixTransposeCPUReference(T* output, } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } -template -int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { +template int compare(T* TransposeMatrix, T* cpuTransposeMatrix) { int errors = 0; for (int i = 0; i < NUM; i++) { - if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { - errors++; - } - } - return errors; -} - -template <> -int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { - int errors = 0; - for (int i = 0; i < NUM; i++) { - if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + if (TransposeMatrix[i] != cpuTransposeMatrix[i]) { errors++; } } return errors; } -template -void init(T* Matrix) { +template <> int compare<__half>(__half* TransposeMatrix, __half* cpuTransposeMatrix) { + int errors = 0; + for (int i = 0; i < NUM; i++) { + if (__half2float(TransposeMatrix[i]) != __half2float(cpuTransposeMatrix[i])) { // NOLINT + errors++; + } + } + return errors; +} + +template void init(T* Matrix) { // initialize the input data T factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = (T)i + factor; + Matrix[i] = (T)i + factor; } } -template <> -void init(__half* Matrix) { +template <> void init(__half* Matrix) { // initialize the input data __half factor; getFactor(&factor); for (int i = 0; i < NUM; i++) { - Matrix[i] = i + __half2float(factor); + Matrix[i] = i + __half2float(factor); } } -template -static void runTest() { +template static void runTest() { T* Matrix; T* TransposeMatrix; T* cpuTransposeMatrix; @@ -129,21 +116,17 @@ static void runTest() { // allocate the memory on the device side HIP_CHECK(hipMalloc(reinterpret_cast(&gpuMatrix), NUM * sizeof(T))); - HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), - NUM * sizeof(T))); + HIP_CHECK(hipMalloc(reinterpret_cast(&gpuTransposeMatrix), NUM * sizeof(T))); // Memory transfer from host to device - HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), - hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(T), hipMemcpyHostToDevice)); // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(1), - dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), 0, 0, - gpuTransposeMatrix, gpuMatrix, WIDTH); + hipLaunchKernelGGL(matrixTranspose, dim3(1), dim3(THREADS_PER_BLOCK_X * THREADS_PER_BLOCK_Y), + 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); // Memory transfer from device to host - HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, - NUM * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(T), hipMemcpyDeviceToHost)); // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); @@ -183,26 +166,12 @@ static void runTest() { */ TEST_CASE("Unit_hipShflTests") { - SECTION("run test for int") { - runTest(); - } - SECTION("run test for float") { - runTest(); - } - SECTION("run test for double") { - runTest(); - } + SECTION("run test for int") { runTest(); } + SECTION("run test for float") { runTest(); } + SECTION("run test for double") { runTest(); } // Test added to support half datatype. - SECTION("run test for __half") { - runTest<__half>(); - } - SECTION("run test for int64_t") { - runTest(); - } - SECTION("run test for unsigned int") { - runTest(); - } - SECTION("run test for uint64_t") { - runTest(); - } + SECTION("run test for __half") { runTest<__half>(); } + SECTION("run test for int64_t") { runTest(); } + SECTION("run test for unsigned int") { runTest(); } + SECTION("run test for uint64_t") { runTest(); } } diff --git a/catch/unit/kernel/hipShflUpDownTest.cc b/catch/unit/kernel/hipShflUpDownTest.cc index ab80dd51b1..a06216f03d 100644 --- a/catch/unit/kernel/hipShflUpDownTest.cc +++ b/catch/unit/kernel/hipShflUpDownTest.cc @@ -21,12 +21,10 @@ THE SOFTWARE. #include #include #include -#include const int size = 32; -template -__global__ void shflDownSum(T* a, int size) { +template __global__ void shflDownSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_down(val, i, size); @@ -34,8 +32,7 @@ __global__ void shflDownSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflUpSum(T* a, int size) { +template __global__ void shflUpSum(T* a, int size) { T val = a[threadIdx.x]; for (int i = size / 2; i > 0; i /= 2) { val += __shfl_up(val, i, size); @@ -43,34 +40,29 @@ __global__ void shflUpSum(T* a, int size) { a[threadIdx.x] = val; } -template -__global__ void shflXorSum(T* a, int size) { +template __global__ void shflXorSum(T* a, int size) { T val = a[threadIdx.x]; - for (int i = size/2; i > 0; i /= 2) { + for (int i = size / 2; i > 0; i /= 2) { val += __shfl_xor(val, i, size); } a[threadIdx.x] = val; } static void getFactor(int* fact) { *fact = 101; } -static void getFactor(unsigned int* fact) { - *fact = static_cast(INT32_MAX)+1; -} +static void getFactor(unsigned int* fact) { *fact = static_cast(INT32_MAX) + 1; } static void getFactor(float* fact) { *fact = 2.5; } static void getFactor(double* fact) { *fact = 2.5; } static void getFactor(__half* fact) { *fact = 2.5; } static void getFactor(int64_t* fact) { *fact = 303; } -static void getFactor(uint64_t* fact) { - *fact = static_cast(__LONG_LONG_MAX__)+1; -} +static void getFactor(uint64_t* fact) { *fact = static_cast(__LONG_LONG_MAX__) + 1; } template T sum(T* a) { T cpuSum = 0; T factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + factor; - cpuSum += a[i]; + a[i] = i + factor; + cpuSum += a[i]; } return cpuSum; } @@ -80,8 +72,8 @@ template <> __half sum(__half* a) { __half factor; getFactor(&factor); for (int i = 0; i < size; i++) { - a[i] = i + __half2float(factor); - cpuSum = __half2float(cpuSum) + __half2float(a[i]); + a[i] = i + __half2float(factor); + cpuSum = __half2float(cpuSum) + __half2float(a[i]); } return cpuSum; } @@ -100,8 +92,7 @@ template <> bool compare(__half gpuSum, __half cpuSum) { return false; } -template -static void runTestShflUp() { +template static void runTestShflUp() { const int size = 32; T a[size]; T cpuSum = sum(a); @@ -114,8 +105,7 @@ static void runTestShflUp() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflDown() { +template static void runTestShflDown() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -127,8 +117,7 @@ static void runTestShflDown() { HIP_CHECK(hipFree(d_a)); } -template -static void runTestShflXor() { +template static void runTestShflXor() { T a[size]; T cpuSum = sum(a); T* d_a; @@ -141,12 +130,12 @@ static void runTestShflXor() { } /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_up function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_up(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_up function + */ /** * Test Description @@ -164,27 +153,13 @@ static void runTestShflXor() { */ TEST_CASE("Unit_runTestShfl_up") { - SECTION("runTestShflUp for int") { - runTestShflUp(); - } - SECTION("runTestShflUp for float") { - runTestShflUp(); - } - SECTION("runTestShflUp for double") { - runTestShflUp(); - } - SECTION("runTestShflUp for __half") { - runTestShflUp<__half>(); - } - SECTION("runTestShflUp for int64_t") { - runTestShflUp(); - } - SECTION("runTestShflUp for unsigned int") { - runTestShflUp(); - } - SECTION("runTestShflUp for uint64_t") { - runTestShflUp(); - } + SECTION("runTestShflUp for int") { runTestShflUp(); } + SECTION("runTestShflUp for float") { runTestShflUp(); } + SECTION("runTestShflUp for double") { runTestShflUp(); } + SECTION("runTestShflUp for __half") { runTestShflUp<__half>(); } + SECTION("runTestShflUp for int64_t") { runTestShflUp(); } + SECTION("runTestShflUp for unsigned int") { runTestShflUp(); } + SECTION("runTestShflUp for uint64_t") { runTestShflUp(); } } /** * End doxygen group __shfl. @@ -192,12 +167,12 @@ TEST_CASE("Unit_runTestShfl_up") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - -* Contains warp __shfl_down function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_down(T var, unsigned int lane_delta, int width = warpSize)` - + * Contains warp __shfl_down function + */ /** * Test Description @@ -215,27 +190,13 @@ TEST_CASE("Unit_runTestShfl_up") { */ TEST_CASE("Unit_runTestShfl_Down") { - SECTION("runTestShflDown for int") { - runTestShflDown(); - } - SECTION("runTestShflDown for float") { - runTestShflDown(); - } - SECTION("runTestShflDown for double") { - runTestShflDown(); - } - SECTION("runTestShflDown for __half") { - runTestShflDown<__half>(); - } - SECTION("runTestShflDown for int64_t") { - runTestShflDown(); - } - SECTION("runTestShflDown for unsigned int") { - runTestShflDown(); - } - SECTION("runTestShflDown for uint64_t") { - runTestShflDown(); - } + SECTION("runTestShflDown for int") { runTestShflDown(); } + SECTION("runTestShflDown for float") { runTestShflDown(); } + SECTION("runTestShflDown for double") { runTestShflDown(); } + SECTION("runTestShflDown for __half") { runTestShflDown<__half>(); } + SECTION("runTestShflDown for int64_t") { runTestShflDown(); } + SECTION("runTestShflDown for unsigned int") { runTestShflDown(); } + SECTION("runTestShflDown for uint64_t") { runTestShflDown(); } } /** * End doxygen group __shfl. @@ -243,12 +204,12 @@ TEST_CASE("Unit_runTestShfl_Down") { */ /** -* @addtogroup __shfl __shfl -* @{ -* @ingroup ShflTest -* `T __shfl_xor(T var, int laneMask, int width=warpSize)` - -* Contains warp __shfl_xor function -*/ + * @addtogroup __shfl __shfl + * @{ + * @ingroup ShflTest + * `T __shfl_xor(T var, int laneMask, int width=warpSize)` - + * Contains warp __shfl_xor function + */ /** * Test Description @@ -266,27 +227,13 @@ TEST_CASE("Unit_runTestShfl_Down") { */ TEST_CASE("Unit_runTestShfl_Xor") { - SECTION("runTestShflXor for int") { - runTestShflXor(); - } - SECTION("runTestShflXor for float") { - runTestShflXor(); - } - SECTION("runTestShflXor for double") { - runTestShflXor(); - } - SECTION("runTestShflXor for __half") { - runTestShflXor<__half>(); - } - SECTION("runTestShflXor for int64_t") { - runTestShflXor(); - } - SECTION("runTestShflXor for unsigned int") { - runTestShflXor(); - } - SECTION("runTestShflXor for uint64_t") { - runTestShflXor(); - } + SECTION("runTestShflXor for int") { runTestShflXor(); } + SECTION("runTestShflXor for float") { runTestShflXor(); } + SECTION("runTestShflXor for double") { runTestShflXor(); } + SECTION("runTestShflXor for __half") { runTestShflXor<__half>(); } + SECTION("runTestShflXor for int64_t") { runTestShflXor(); } + SECTION("runTestShflXor for unsigned int") { runTestShflXor(); } + SECTION("runTestShflXor for uint64_t") { runTestShflXor(); } } /** * End doxygen group __shfl. diff --git a/catch/unit/stream/hipStreamGetDevice.cc b/catch/unit/stream/hipStreamGetDevice.cc index 9f2eef521e..1fe87eaeeb 100644 --- a/catch/unit/stream/hipStreamGetDevice.cc +++ b/catch/unit/stream/hipStreamGetDevice.cc @@ -20,7 +20,6 @@ THE SOFTWARE. #include #include #include -#include #define NUMBER_OF_THREADS 10 static bool thread_results[NUMBER_OF_THREADS]; @@ -54,8 +53,7 @@ TEST_CASE("Unit_hipStreamGetDevice_Negative") { HIP_CHECK(hipStreamCreate(&stream)); HIP_CHECK_ERROR(hipStreamGetDevice(nullptr, nullptr), hipErrorInvalidValue); - HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), - hipErrorInvalidValue); + HIP_CHECK_ERROR(hipStreamGetDevice(hipStreamPerThread, nullptr), hipErrorInvalidValue); HIP_CHECK_ERROR(hipStreamGetDevice(stream, nullptr), hipErrorInvalidValue); HIP_CHECK(hipStreamDestroy(stream)); } @@ -145,9 +143,7 @@ static bool validateStreamGetDevice() { return true; } -static void thread_Test(int threadNum) { - thread_results[threadNum] = validateStreamGetDevice(); -} +static void thread_Test(int threadNum) { thread_results[threadNum] = validateStreamGetDevice(); } static bool test_hipStreamGetDevice_MThread() { std::vector tests; @@ -158,7 +154,7 @@ static bool test_hipStreamGetDevice_MThread() { tests.push_back(std::thread(thread_Test, idx)); } // Wait for all threads to complete - for (std::thread &t : tests) { + for (std::thread& t : tests) { t.join(); } // Wait for thread @@ -169,9 +165,7 @@ static bool test_hipStreamGetDevice_MThread() { return status; } -TEST_CASE("Unit_hipStreamGetDevice_MThread") { - REQUIRE(true == test_hipStreamGetDevice_MThread()); -} +TEST_CASE("Unit_hipStreamGetDevice_MThread") { REQUIRE(true == test_hipStreamGetDevice_MThread()); } /** * Test Description From 122403f464365213e22bfd11cd8cb19c232b5468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:36:01 +0100 Subject: [PATCH 07/71] EXSWHTEC-283 - Introduce build dependencies for numerical accuracy tests #233 Change-Id: I5381beb47347c24a161113b986fa7f252057ffdb --- catch/CMakeLists.txt | 4 ++++ catch/hipTestMain/main.cc | 6 ++++++ catch/include/cmd_options.hh | 5 +++++ catch/include/hip_test_defgroups.hh | 3 +++ catch/unit/CMakeLists.txt | 1 + catch/unit/compileAndCaptureOutput.py | 2 +- catch/unit/math/CMakeLists.txt | 26 ++++++++++++++++++++++++++ utils/coverage/hipAPICoverageUtils.cpp | 16 ++++++++++++++-- 8 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 catch/unit/math/CMakeLists.txt diff --git a/catch/CMakeLists.txt b/catch/CMakeLists.txt index e73460d0dd..f31db5f10f 100644 --- a/catch/CMakeLists.txt +++ b/catch/CMakeLists.txt @@ -89,6 +89,9 @@ else() set(HIP_PACKAGING_VERSION_PATCH ${HIP_VERSION_PATCH}-${HIP_VERSION_GITHASH}) endif() + +find_package(Boost 1.70.0) + if(NOT DEFINED CATCH2_PATH) if(DEFINED ENV{CATCH2_PATH}) set(CATCH2_PATH $ENV{CATCH2_PATH} CACHE STRING "Catch2 Path") @@ -126,6 +129,7 @@ include_directories( "./kernels" ${HIP_PATH}/include ${JSON_PARSER} + ${Boost_INCLUDE_DIRS} ) option(RTC_TESTING "Run tests using HIP RTC to compile the kernels" OFF) diff --git a/catch/hipTestMain/main.cc b/catch/hipTestMain/main.cc index 109b0593fc..e869695572 100644 --- a/catch/hipTestMain/main.cc +++ b/catch/hipTestMain/main.cc @@ -36,6 +36,12 @@ int main(int argc, char** argv) { | Opt(cmd_options.cg_iterations, "cg_iterations") ["-C"]["--cg-iterations"] ("Number of iterations used for cooperative groups sync tests (default: 5)") + | Opt(cmd_options.accuracy_iterations, "accuracy_iterations") + ["-A"]["--accuracy-iterations"] + ("Number of iterations used for math accuracy tests with randomly generated inputs (default: 2^32)") + | Opt(cmd_options.accuracy_max_memory, "accuracy_max_memory") + ["-M"]["--accuracy-max-memory"] + ("Percentage of global device memory allowed for math accuracy tests (default: 80%)") ; // clang-format on diff --git a/catch/include/cmd_options.hh b/catch/include/cmd_options.hh index 6caf7a0f48..666f34ea82 100644 --- a/catch/include/cmd_options.hh +++ b/catch/include/cmd_options.hh @@ -22,6 +22,9 @@ THE SOFTWARE. #pragma once +#include +#include + struct CmdOptions { int iterations = 10; int warmups = 100; @@ -29,6 +32,8 @@ struct CmdOptions { int cg_iterations = 5; bool no_display = false; bool progress = false; + uint64_t accuracy_iterations = std::numeric_limits::max() + 1ull; + int accuracy_max_memory = 80; }; extern CmdOptions cmd_options; diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index c1f72bdf6c..0a6d4dde90 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -284,6 +284,9 @@ TEST_CASE("Unit_atomicDec_Negative_Parameters") {} /** * End doxygen group AtomicsTest. + * @defgroup MathTest Math Device Functions + * @{ + * This section describes tests for device math functions of HIP runtime API. * @} */ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 46f79b4d1d..f1c8faa011 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -43,6 +43,7 @@ add_subdirectory(g++) add_subdirectory(module) add_subdirectory(channelDescriptor) add_subdirectory(executionControl) +add_subdirectory(math) add_subdirectory(vector_types) add_subdirectory(atomics) add_subdirectory(complex) diff --git a/catch/unit/compileAndCaptureOutput.py b/catch/unit/compileAndCaptureOutput.py index a8a7fb506a..b5bb925cc4 100644 --- a/catch/unit/compileAndCaptureOutput.py +++ b/catch/unit/compileAndCaptureOutput.py @@ -52,7 +52,7 @@ class CompileAndCapture(unittest.TestCase): # HIP compiler on AMD platforms has limit of 20 errors, and some negative # test cases expect that more errors are detected. if (self.platform == 'amd'): - compiler_args.append('-ferror-limit=100') + compiler_args.append('-ferror-limit=200') compiler_output = subprocess.run(compiler_args, stderr=subprocess.PIPE) # Get the compiler output in the stdout if -V flag is raised during ctest invocation. compiler_stderr = compiler_output.stderr.decode('UTF-8') diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt new file mode 100644 index 0000000000..844cbfa8d3 --- /dev/null +++ b/catch/unit/math/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC +) + +hip_add_exe_to_target(NAME MathsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) diff --git a/utils/coverage/hipAPICoverageUtils.cpp b/utils/coverage/hipAPICoverageUtils.cpp index aaf19dd906..30242fbcb9 100644 --- a/utils/coverage/hipAPICoverageUtils.cpp +++ b/utils/coverage/hipAPICoverageUtils.cpp @@ -47,6 +47,7 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { std::string api_member{"." + hip_api.getName() + "("}; std::string api_newline{" " + hip_api.getName() + "("}; std::string api_templated{" " + hip_api.getName() + "<"}; + std::string api_kernel_def_macro{"_KERNEL_DEF(" + hip_api.getName() + ")"}; std::string api_restriction{hip_api.getFileRestriction()}; bool found_restriction{false}; @@ -66,7 +67,8 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { (line.find(api_member) != std::string::npos) || (line.find(api_newline) != std::string::npos) || (line.find(hip_api.getName() + "(") == 0) || - (line.find(api_templated) != std::string::npos)) { + (line.find(api_templated) != std::string::npos) || + (line.find(api_kernel_def_macro) != std::string::npos)) { if (api_restriction == "" || found_restriction) { hip_api.addFileOccurrence(FileOccurrence(test_module_file, line_number)); } @@ -135,6 +137,8 @@ void findAPITestCaseInFileByAPIName(HipAPI& hip_api, std::string test_module_fil std::string line; std::string test_case_definition{"TEST_CASE("}; + std::string test_def_macro{"_TEST_DEF("}; + std::string test_def_impl_macro{"_TEST_DEF_IMPL("}; std::string test_case{"None"}; while (std::getline(test_module_file_handler, line)) { @@ -146,6 +150,14 @@ void findAPITestCaseInFileByAPIName(HipAPI& hip_api, std::string test_module_fil if (test_case.find("_" + hip_api.getName() + "_") != std::string::npos) { hip_api.addTestCase(TestCaseOccurrence{test_case, test_module_file, line_number}); } + } else if ((line.find(test_def_macro) != std::string::npos) || + (line.find(test_def_impl_macro) != std::string::npos)) { + test_case = line.substr(line.find("(") + 1); + test_case = test_case.substr(0, test_case.find(",")); + if (test_case == hip_api.getName() || test_case == hip_api.getName() + "_wrapper") { + hip_api.addTestCase(TestCaseOccurrence{"Unit_Device_" + test_case + "_Accuracy_Positive", + test_module_file, line_number}); + } } } @@ -403,4 +415,4 @@ std::vector extractTestModuleFiles(std::string& tests_root_director std::string findAbsolutePathOfFile(std::string file_path) { return std::filesystem::canonical(std::filesystem::absolute(file_path)); -} +} \ No newline at end of file From 36620358e64226d908669567b186db70a6f28ae7 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 22 Jan 2024 18:59:43 +0530 Subject: [PATCH 08/71] EXSWHTEC-287 - Implement tests for trigonometric device math functions #231 Change-Id: I44188fa2f60f2492e05596464c914b8c739be482 --- catch/unit/math/CMakeLists.txt | 11 + catch/unit/math/binary_common.hh | 136 ++++++++ catch/unit/math/math_common.hh | 240 +++++++++++++ catch/unit/math/math_special_values.hh | 287 ++++++++++++++++ catch/unit/math/quaternary_common.hh | 246 ++++++++++++++ catch/unit/math/ternary_common.hh | 142 ++++++++ catch/unit/math/thread_pool.hh | 64 ++++ .../trig_double_precision_negative_kernels.cc | 108 ++++++ catch/unit/math/trig_funcs.cc | 137 ++++++++ catch/unit/math/trig_negative_kernels_rtc.hh | 320 ++++++++++++++++++ .../trig_single_precision_negative_kernels.cc | 118 +++++++ catch/unit/math/unary_common.hh | 198 +++++++++++ catch/unit/math/validators.hh | 152 +++++++++ 13 files changed, 2159 insertions(+) create mode 100644 catch/unit/math/binary_common.hh create mode 100644 catch/unit/math/math_common.hh create mode 100644 catch/unit/math/math_special_values.hh create mode 100644 catch/unit/math/quaternary_common.hh create mode 100644 catch/unit/math/ternary_common.hh create mode 100644 catch/unit/math/thread_pool.hh create mode 100644 catch/unit/math/trig_double_precision_negative_kernels.cc create mode 100644 catch/unit/math/trig_funcs.cc create mode 100644 catch/unit/math/trig_negative_kernels_rtc.hh create mode 100644 catch/unit/math/trig_single_precision_negative_kernels.cc create mode 100644 catch/unit/math/unary_common.hh create mode 100644 catch/unit/math/validators.hh diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 844cbfa8d3..2ebf11063d 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -19,8 +19,19 @@ # THE SOFTWARE. set(TEST_SRC + trig_funcs.cc ) hip_add_exe_to_target(NAME MathsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) + +add_test(NAME Unit_Device_Single_Precision_Trig_Functions_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + trig_single_precision_negative_kernels.cc 66) + +add_test(NAME Unit_Device_Double_Precision_Trig_Functions_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + trig_double_precision_negative_kernels.cc 66) \ No newline at end of file diff --git a/catch/unit/math/binary_common.hh b/catch/unit/math/binary_common.hh new file mode 100644 index 0000000000..72de23096e --- /dev/null +++ b/catch/unit/math/binary_common.hh @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_BINARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const x1s, \ + T* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +void BinaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 2 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void BinaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + x1s.ptr()[i * values.size + j] = values.data[i]; + x2s.ptr()[i * values.size + j] = values.data[j]; + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void BinaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + BinaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { BinaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} + + +#define MATH_BINARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_BINARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + BinaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh new file mode 100644 index 0000000000..8b59558389 --- /dev/null +++ b/catch/unit/math/math_common.hh @@ -0,0 +1,240 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +#include + +#include "thread_pool.hh" +#include "validators.hh" + +namespace cg = cooperative_groups; + +template +std::enable_if_t, std::is_arithmetic>, std::ostream&> +operator<<(std::ostream& os, const std::pair& p) { + const auto default_prec = os.precision(); + return os << "<" << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.first << ", " + << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.second << ">" + << std::setprecision(default_prec); +} + +// This class represents a generic numerical accuracy math test. Template parameter T is the output +// type of the function being tested, and template parameter pack Ts represents the input types. The +// constructor takes a kernel with the signature void(T*, const size_t, Ts*...). The first kernel +// parameter is the output array, the second parameter is the number of outputs, and the rest of the +// parameters are arrays containing input values. The number of input arrays depends on the arity of +// the function being tested e.g. one input array for unary functions, two input arrays for binary +// functions, etc. The kernel threads take one element from each input array at the index +// corresponding to that thread, feed the input elements to the testee function, and store the +// result in the output array at the corresponding index. +// +// E.g. for a binary function the kernel would have the following signature: +// void kernel(float* y, const size_t n, float* x1, float* x2) +// +// The outputs would be calculated in parallel the following way: +// y[0] = testee(x1[0], x2[0]) +// y[1] = testee(x1[1], x2[1]) +// y[2] = testee(x1[2], x2[2]) +// ... +// +// The constructor also takes max_num_args, which represents the maximum number of input values used +// for one kernel launch. The device memory for the input and output arrays is allocated based on +// that number. +template class MathTest { + public: + MathTest(void (*kernel)(T*, const size_t, Ts*...), const size_t max_num_args) + : kernel_{kernel}, + xss_dev_(LinearAllocGuard(LinearAllocs::hipMalloc, max_num_args * sizeof(Ts))...), + y_dev_{LinearAllocs::hipMalloc, max_num_args * sizeof(T)}, + y_{LinearAllocs::hipHostMalloc, max_num_args * sizeof(T)} {} + + // This method runs the test with the following steps: + // 1. Copy the values from the input arrays provided in the parameter pack xss to device memory + // 2. Launch the kernel using the configuration provided in grid_dims and block_dims + // 3. Copy the outputs back to host memory + // 4. Generate the reference values using ref_func and compare against the outputs using the + // validator provided by validator_builder + // 5. If non-type template parameter parallel is true, then step 4 is broken up into chunks of + // work that are done in parallel on the host. + template + void Run(const ValidatorBuilder& validator_builder, const size_t grid_dims, + const size_t block_dims, RT (*const ref_func)(RTs...), const size_t num_args, + const Ts*... xss) { + fail_flag_.store(false); + error_info_.clear(); + RunImpl(validator_builder, grid_dims, block_dims, ref_func, num_args, + std::index_sequence_for{}, xss...); + } + + private: + void (*kernel_)(T*, const size_t, Ts*...); + std::tuple...> xss_dev_; + LinearAllocGuard y_dev_; + LinearAllocGuard y_; + std::atomic fail_flag_{false}; + std::mutex mtx_; + std::string error_info_; + + template + void RunImpl(const ValidatorBuilder& validator_builder, const size_t grid_dim, + const size_t block_dim, RT (*const ref_func)(RTs...), const size_t num_args, + std::index_sequence is, const Ts*... xss) { + const auto xss_tup = std::make_tuple(xss...); + + constexpr auto f = [](auto dst, auto src, size_t size) { + HIP_CHECK(hipMemcpy(dst, src, size, hipMemcpyHostToDevice)) + }; + + ((f(std::get(xss_dev_).ptr(), std::get(xss_tup), + num_args * sizeof(*std::get(xss_tup)))), + ...); + + kernel_<<>>(y_dev_.ptr(), num_args, std::get(xss_dev_).ptr()...); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y_.ptr(), y_dev_.ptr(), num_args * sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipStreamSynchronize(nullptr)); + + if constexpr (!parallel) { + for (auto i = 0u; i < num_args; ++i) { + const auto actual_val = y_.ptr()[i]; + const auto ref_val = static_cast(ref_func(xss[i]...)); + const auto validator = validator_builder(ref_val, xss[i]...); + + if (!validator->match(actual_val)) { + const auto log = MakeLogMessage(actual_val, xss[i]...) + validator->describe() + "\n"; + INFO(log); + REQUIRE(false); + } + } + + return; + } + + const auto task = [&, this](size_t iters, size_t base_idx) { + for (auto i = 0u; i < iters; ++i) { + if (fail_flag_.load(std::memory_order_relaxed)) return; + + const auto actual_val = y_.ptr()[base_idx + i]; + const auto ref_val = static_cast(ref_func(xss[base_idx + i]...)); + const auto validator = validator_builder(ref_val, xss[base_idx + i]...); + + if (!validator->match(actual_val)) { + fail_flag_.store(true, std::memory_order_relaxed); + // Several threads might have passed the first check, but failed validation. On the + // chance of this happening, access to the string stream must be serialized. + const auto log = + MakeLogMessage(actual_val, xss[base_idx + i]...) + validator->describe() + "\n"; + { + std::lock_guard lg{mtx_}; + error_info_ += log; + } + return; + } + } + }; + + const auto task_count = thread_pool.thread_count(); + const auto chunk_size = num_args / task_count; + const auto tail = num_args % task_count; + + auto base_idx = 0u; + for (auto i = 0u; i < task_count; ++i) { + const auto iters = chunk_size + (i < tail); + thread_pool.Post([=, &task] { task(iters, base_idx); }); + base_idx += iters; + } + + thread_pool.Wait(); + + INFO(error_info_); + REQUIRE(!fail_flag_); + } + + template std::string MakeLogMessage(T actual_val, Args... args) { + std::stringstream ss; + ss << "Input value(s): " << std::scientific + << std::setprecision(std::numeric_limits::max_digits10 - 1); + ((ss << " " << args), ...) << "\n" << actual_val << " "; + + return ss.str(); + } +}; + +template struct RefType {}; + +template <> struct RefType { using type = double; }; + +template <> struct RefType { using type = long double; }; + +template using RefType_t = typename RefType::type; + +template auto GetOccupancyMaxPotentialBlockSize(F kernel) { + int grid_size = 0, block_size = 0; + HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&grid_size, &block_size, kernel, 0, 0)); + return std::make_tuple(grid_size, block_size); +} + +inline size_t GetMaxAllowedDeviceMemoryUsage() { + hipDeviceProp_t props; + HIP_CHECK(hipGetDeviceProperties(&props, 0)); + return props.totalGlobalMem * (cmd_options.accuracy_max_memory * 0.01f); +} + +inline uint64_t GetTestIterationCount() { return cmd_options.accuracy_iterations; } + +template using kernel_sig = void (*)(T*, const size_t, Ts*...); + +template using ref_sig = T (*)(Ts...); + +template void NegativeTestRTCWrapper(const char* program_source) { + hiprtcProgram program{}; + + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "math_test_rtc.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{error_num}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/math/math_special_values.hh b/catch/unit/math/math_special_values.hh new file mode 100644 index 0000000000..bc5488fc31 --- /dev/null +++ b/catch/unit/math/math_special_values.hh @@ -0,0 +1,287 @@ +// +// Copyright (c) 2017 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Disclaimer: +// This code is based on the work found in OpenCL-CTS authored by The Khronos Group. +// The original code can be found at https://github.com/KhronosGroup/OpenCL-CTS. +// We acknowledge the contributions of The Khronos Group to the development of this code. + +#pragma once + +#include +#include + +/*----------------------------------------------------------------------------- + HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type + float, double, long double respectively. Arguments: + + sm -- sign of number, + int -- integer part of mantissa (without `0x' prefix), + fract -- fractional part of mantissa (without decimal point and `L' or + `LL' suffixes), + se -- sign of exponent, + exp -- absolute value of (binary) exponent. + + Example: + + double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2 + + Note: + + We have to pass signs as separate arguments because gcc pass negative + integer values (e. g. `-2') into a macro as two separate tokens, so + `HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus + and two) which is not a correct floating point literal. +-----------------------------------------------------------------------------*/ +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +// If compiler does not support hex floating point literals: +#define HEX_FLT(sm, int, fract, se, exp) \ + sm ldexpf((float)(0x##int##fract##UL), \ + se exp + ilogbf((float)0x##int) - ilogbf((float)(0x##int##fract##UL))) +#define HEX_DBL(sm, int, fract, se, exp) \ + sm ldexp((double)(0x##int##fract##ULL), \ + se exp + ilogb((double)0x##int) - ilogb((double)(0x##int##fract##ULL))) +#define HEX_LDBL(sm, int, fract, se, exp) \ + sm ldexpl((long double)(0x##int##fract##ULL), \ + se exp + ilogbl((long double)0x##int) - ilogbl((long double)(0x##int##fract##ULL))) +#else +// If compiler supports hex floating point literals: just concatenate all the +// parts into a literal. +#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F +#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp +#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L +#endif + +inline constexpr std::array kSpecialValuesDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, 0, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, 0000000000001, +, 63), + HEX_DBL(-, 1, 0, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 000002, +, 32), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, fffffffffffff, +, 31), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, 0, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -100.0, + -4.0, + -3.5, + -3.0, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 1), + -2.0, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + HEX_DBL(-, 1, 7ffffffffffff, +, 0), + HEX_DBL(-, 1, 0000000000001, +, 0), + -1.0, + HEX_DBL(-, 1, fffffffffffff, -, 1), + HEX_DBL(-, 1, 0000000000001, -, 1), + -0.5, + HEX_DBL(-, 1, fffffffffffff, -, 2), + HEX_DBL(-, 1, 0000000000001, -, 2), + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + HEX_DBL(-, 1, 0000000000001, -, 1022), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000fff, -, 1022), + HEX_DBL(-, 0, 00000000000fe, -, 1022), + HEX_DBL(-, 0, 000000000000e, -, 1022), + HEX_DBL(-, 0, 000000000000c, -, 1022), + HEX_DBL(-, 0, 000000000000a, -, 1022), + HEX_DBL(-, 0, 0000000000008, -, 1022), + HEX_DBL(-, 0, 0000000000007, -, 1022), + HEX_DBL(-, 0, 0000000000006, -, 1022), + HEX_DBL(-, 0, 0000000000005, -, 1022), + HEX_DBL(-, 0, 0000000000004, -, 1022), + HEX_DBL(-, 0, 0000000000003, -, 1022), + HEX_DBL(-, 0, 0000000000002, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0000000000001, +, 64), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, fffffffffffff, +, 63), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 0, +, 63), + HEX_DBL(+, 1, fffffffffffff, +, 62), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, 0, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0000000000001, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +1000.0, + +100.0, + +4.0, + +3.5, + +3.0, + HEX_DBL(+, 1, 8000000000001, +, 1), + +2.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 8000000000001, +, 0), + +1.5, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + HEX_DBL(+, 1, 0000000000001, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 1), + HEX_DBL(+, 1, 0000000000001, -, 1), + +0.5, + HEX_DBL(+, 1, fffffffffffff, -, 2), + HEX_DBL(+, 1, 0000000000001, -, 2), + +0.25, + HEX_DBL(+, 1, fffffffffffff, -, 3), + HEX_DBL(+, 1, 0000000000001, -, 1022), + +std::numeric_limits::min(), + HEX_DBL(+, 0, fffffffffffff, -, 1022), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 00000000000fe, -, 1022), + HEX_DBL(+, 0, 000000000000e, -, 1022), + HEX_DBL(+, 0, 000000000000c, -, 1022), + HEX_DBL(+, 0, 000000000000a, -, 1022), + HEX_DBL(+, 0, 0000000000008, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + HEX_DBL(+, 0, 0000000000006, -, 1022), + HEX_DBL(+, 0, 0000000000005, -, 1022), + HEX_DBL(+, 0, 0000000000004, -, 1022), + HEX_DBL(+, 0, 0000000000003, -, 1022), + HEX_DBL(+, 0, 0000000000002, -, 1022), + HEX_DBL(+, 0, 0000000000001, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, 0, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, 000002, +, 63), + HEX_FLT(-, 1, 0, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 000002, +, 32), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, 000002, +, 31), + HEX_FLT(-, 1, 0, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -100.f, + -4.0f, + -3.5f, + -3.0f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 7ffffe, +, 1), + -2.0f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + HEX_FLT(-, 1, 7ffffe, +, 0), + HEX_FLT(-, 1, 000002, +, 0), + -1.0f, + HEX_FLT(-, 1, fffffe, -, 1), + HEX_FLT(-, 1, 000002, -, 1), + -0.5f, + HEX_FLT(-, 1, fffffe, -, 2), + HEX_FLT(-, 1, 000002, -, 2), + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + HEX_FLT(-, 1, 000002, -, 126), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000ffe, -, 126), + HEX_FLT(-, 0, 0000fe, -, 126), + HEX_FLT(-, 0, 00000e, -, 126), + HEX_FLT(-, 0, 00000c, -, 126), + HEX_FLT(-, 0, 00000a, -, 126), + HEX_FLT(-, 0, 000008, -, 126), + HEX_FLT(-, 0, 000006, -, 126), + HEX_FLT(-, 0, 000004, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 000002, +, 64), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, fffffe, +, 63), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 0, +, 63), + HEX_FLT(+, 1, fffffe, +, 62), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 0, +, 32), + HEX_FLT(+, 1, fffffe, +, 31), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, 0, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +1000.f, + +100.f, + +4.0f, + +3.5f, + +3.0f, + HEX_FLT(+, 1, 800002, +, 1), + 2.5f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 800002, +, 0), + 1.5f, + HEX_FLT(+, 1, 7ffffe, +, 0), + HEX_FLT(+, 1, 000002, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 1), + HEX_FLT(+, 1, 000002, -, 1), + +0.5f, + HEX_FLT(+, 1, fffffe, -, 2), + HEX_FLT(+, 1, 000002, -, 2), + +0.25f, + HEX_FLT(+, 1, fffffe, -, 3), + HEX_FLT(+, 1, 000002, -, 126), + +std::numeric_limits::min(), + HEX_FLT(+, 0, fffffe, -, 126), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 0000fe, -, 126), + HEX_FLT(+, 0, 00000e, -, 126), + HEX_FLT(+, 0, 00000c, -, 126), + HEX_FLT(+, 0, 00000a, -, 126), + HEX_FLT(+, 0, 000008, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + HEX_FLT(+, 0, 000004, -, 126), + HEX_FLT(+, 0, 000002, -, 126), + +0.0f, +}; + +template struct SpecialVals { + const T* const data; + const size_t size; +}; + +inline constexpr auto kSpecialValRegistry = + std::make_tuple(SpecialVals{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()}, + SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}); diff --git a/catch/unit/math/quaternary_common.hh b/catch/unit/math/quaternary_common.hh new file mode 100644 index 0000000000..b29eb52ef4 --- /dev/null +++ b/catch/unit/math/quaternary_common.hh @@ -0,0 +1,246 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_QUATERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s, T* const x4s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i], x4s[i]); \ + } \ + } \ + } + +inline constexpr std::array kSpecialValuesReducedDouble{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_DBL(-, 1, 0000000000001, +, 64), + HEX_DBL(-, 1, fffffffffffff, +, 63), + HEX_DBL(-, 1, fffffffffffff, +, 62), + HEX_DBL(-, 1, 0, +, 32), + HEX_DBL(-, 1, 0000000000001, +, 31), + HEX_DBL(-, 1, fffffffffffff, +, 30), + -1000.0, + -3.5, + HEX_DBL(-, 1, 8000000000001, +, 1), + -2.5, + HEX_DBL(-, 1, 8000000000001, +, 0), + -1.5, + -0.5, + -0.25, + HEX_DBL(-, 1, fffffffffffff, -, 3), + -std::numeric_limits::min(), + HEX_DBL(-, 0, fffffffffffff, -, 1022), + HEX_DBL(-, 0, 0000000000001, -, 1022), + -0.0, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_DBL(+, 1, 0, +, 64), + HEX_DBL(+, 1, 0000000000001, +, 63), + HEX_DBL(+, 1, 000002, +, 32), + HEX_DBL(+, 1, fffffffffffff, +, 31), + HEX_DBL(+, 1, 0, +, 31), + HEX_DBL(+, 1, fffffffffffff, +, 30), + +100.0, + +3.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 1), + +2.0, + HEX_DBL(+, 1, 7ffffffffffff, +, 0), + +1.0, + HEX_DBL(+, 1, fffffffffffff, -, 2), + +std::numeric_limits::min(), + HEX_DBL(+, 0, 0000000000fff, -, 1022), + HEX_DBL(+, 0, 0000000000007, -, 1022), + +0.0, +}; + +inline constexpr std::array kSpecialValuesReducedFloat{ + -std::numeric_limits::quiet_NaN(), + -std::numeric_limits::infinity(), + -std::numeric_limits::max(), + HEX_FLT(-, 1, 000002, +, 64), + HEX_FLT(-, 1, fffffe, +, 63), + HEX_FLT(-, 1, fffffe, +, 62), + HEX_FLT(-, 1, 0, +, 32), + HEX_FLT(-, 1, fffffe, +, 31), + HEX_FLT(-, 1, fffffe, +, 30), + -1000.f, + -3.5f, + HEX_FLT(-, 1, 800002, +, 1), + -2.5f, + HEX_FLT(-, 1, 800002, +, 0), + -1.5f, + -0.5f, + -0.25f, + HEX_FLT(-, 1, fffffe, -, 3), + -std::numeric_limits::min(), + HEX_FLT(-, 0, fffffe, -, 126), + HEX_FLT(-, 0, 000002, -, 126), + -0.0f, + + std::numeric_limits::quiet_NaN(), + std::numeric_limits::infinity(), + std::numeric_limits::max(), + HEX_FLT(+, 1, 0, +, 64), + HEX_FLT(+, 1, 000002, +, 63), + HEX_FLT(+, 1, 000002, +, 32), + HEX_FLT(+, 1, 000002, +, 31), + HEX_FLT(+, 1, fffffe, +, 30), + +100.f, + +4.0f, + HEX_FLT(+, 1, 7ffffe, +, 1), + +2.0f, + HEX_FLT(+, 1, 7ffffe, +, 0), + +1.0f, + HEX_FLT(+, 1, fffffe, -, 2), + +std::numeric_limits::min(), + HEX_FLT(+, 0, 000ffe, -, 126), + HEX_FLT(+, 0, 000006, -, 126), + +0.0f, +}; + +inline constexpr auto kSpecialValReducedRegistry = std::make_tuple( + SpecialVals{kSpecialValuesReducedFloat.data(), kSpecialValuesReducedFloat.size()}, + SpecialVals{kSpecialValuesReducedDouble.data(), kSpecialValuesReducedDouble.size()}); + +template +void QuaternaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 4 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s, &x4s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x4s.ptr() + base_idx, x4s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); + } +} + +template +void QuaternaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValReducedRegistry); + + const auto size = values.size * values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x4s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + for (auto l = 0u; l < values.size; ++l) { + x1s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[i]; + x2s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[j]; + x3s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[k]; + x4s.ptr()[((i * values.size + j) * values.size + k) * values.size + l] = values.data[l]; + } + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr(), x4s.ptr()); +} + +template +void QuaternaryFloatingPointTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + QuaternaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { + QuaternaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); + } +} + + +#define MATH_QUATERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_QUATERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + QuaternaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/catch/unit/math/ternary_common.hh b/catch/unit/math/ternary_common.hh new file mode 100644 index 0000000000..53b28c6b5a --- /dev/null +++ b/catch/unit/math/ternary_common.hh @@ -0,0 +1,142 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_TERNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, T* const x1s, T* const x2s, \ + T* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i], x3s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } \ + } + +template +void TernaryFloatingPointBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(TArg) * 3 + sizeof(T)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s, &x3s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + std::generate(x3s.ptr() + base_idx, x3s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); + } +} + +template +void TernaryFloatingPointSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + const auto size = values.size * values.size * values.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + LinearAllocGuard x3s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; + + for (auto i = 0u; i < values.size; ++i) { + for (auto j = 0u; j < values.size; ++j) { + for (auto k = 0u; k < values.size; ++k) { + x1s.ptr()[(i * values.size + j) * values.size + k] = values.data[i]; + x2s.ptr()[(i * values.size + j) * values.size + k] = values.data[j]; + x3s.ptr()[(i * values.size + j) * values.size + k] = values.data[k]; + } + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr(), x3s.ptr()); +} + +template +void TernaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + TernaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { TernaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} + + +#define MATH_TERNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_TERNARY_KERNEL_DEF(kern_name) \ + \ + TEMPLATE_TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive", "", float, double) { \ + using RT = RefType_t; \ + RT (*ref)(RT, RT, RT) = ref_func; \ + const auto ulp = std::is_same_v ? sp_ulp : dp_ulp; \ + \ + TernaryFloatingPointTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(ulp)); \ + } \ No newline at end of file diff --git a/catch/unit/math/thread_pool.hh b/catch/unit/math/thread_pool.hh new file mode 100644 index 0000000000..d45e5e8b1b --- /dev/null +++ b/catch/unit/math/thread_pool.hh @@ -0,0 +1,64 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +#include +#include + +// This is a simple wrapper around boost::asio::thread_pool that keeps track of the number of +// currently active tasks using an atomic counter. +class ThreadPool { + public: + ThreadPool(size_t thread_count = std::thread::hardware_concurrency()) + : thread_count_(thread_count) {} + + ~ThreadPool() { thread_pool_.join(); } + + // Submits a task to the thread pool and increments the number of active tasks. The task is + // wrapped in a lambda that decrements the number of active tasks upon completion. + template void Post(T&& task) { + ++active_tasks_; + auto&& task_wrapper = [task, this] { + task(); + --active_tasks_; + }; + boost::asio::post(thread_pool_, task_wrapper); + } + + // Busy waits for the number of active tasks to reach zero. + void Wait() const { + while (active_tasks_.load(std::memory_order_relaxed)) + ; + } + + size_t thread_count() const { return thread_count_; } + + private: + const size_t thread_count_; + boost::asio::thread_pool thread_pool_{thread_count_}; + std::atomic active_tasks_; +}; + +inline ThreadPool thread_pool{}; diff --git a/catch/unit/math/trig_double_precision_negative_kernels.cc b/catch/unit/math/trig_double_precision_negative_kernels.cc new file mode 100644 index 0000000000..2008837fd4 --- /dev/null +++ b/catch/unit/math/trig_double_precision_negative_kernels.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_DP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_DP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_DP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_DP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_DP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } +__global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); +} +__global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } +__global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); +} +__global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } +__global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); +} \ No newline at end of file diff --git a/catch/unit/math/trig_funcs.cc b/catch/unit/math/trig_funcs.cc new file mode 100644 index 0000000000..9671b94ab9 --- /dev/null +++ b/catch/unit/math/trig_funcs.cc @@ -0,0 +1,137 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "trig_negative_kernels_rtc.hh" + +#include "unary_common.hh" +#include "binary_common.hh" + +#include + + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sin, std::sin, 2, 2); +TEST_CASE("Unit_Device_sin_sinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cos, std::cos, 2, 2) +TEST_CASE("Unit_Device_cos_cosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tan, std::tan, 4, 2) +TEST_CASE("Unit_Device_tan_tanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asin, std::asin, 2, 2) +TEST_CASE("Unit_Device_asin_asinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsin); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acos, std::acos, 2, 2) +TEST_CASE("Unit_Device_acos_acosf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcos); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atan, std::atan, 2, 2) +TEST_CASE("Unit_Device_atan_atanf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtan); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinh, std::sinh, 3, 2) +TEST_CASE("Unit_Device_sinh_sinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cosh, std::cosh, 2, 1) +TEST_CASE("Unit_Device_cosh_coshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(tanh, std::tanh, 2, 1) +TEST_CASE("Unit_Device_tanh_tanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(asinh, std::asinh, 3, 2) +TEST_CASE("Unit_Device_asinh_asinhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAsinh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(acosh, std::acosh, 4, 2) +TEST_CASE("Unit_Device_acosh_acoshf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAcosh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(atanh, std::atanh, 3, 2) +TEST_CASE("Unit_Device_atanh_atanhf_Negative_RTC") { NegativeTestRTCWrapper<4>(kAtanh); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(sinpi, boost::math::sin_pi, 2, 2); +TEST_CASE("Unit_Device_sinpi_sinpif_Negative_RTC") { NegativeTestRTCWrapper<4>(kSinpi); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(cospi, boost::math::cos_pi, 2, 2); +TEST_CASE("Unit_Device_cospi_cospif_Negative_RTC") { NegativeTestRTCWrapper<4>(kCospi); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(atan2, std::atan2, 3, 2); +TEST_CASE("Unit_Device_atan2_atan2f_Negative_RTC") { NegativeTestRTCWrapper<8>(kAtan2); } + + +template +__global__ void sincos_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincosf(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincos(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincos(T x) { return {std::sin(x), std::cos(x)}; } + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincos_kernel, sincos, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincos_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincos_kernel, sincos, validator_builder); +} + +TEST_CASE("Unit_Device_sincos_sincosf_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincos); } + + +template +__global__ void sincospi_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + sincospif(xs[i], &ys[i].first, &ys[i].second); + } else if constexpr (std::is_same_v) { + sincospi(xs[i], &ys[i].first, &ys[i].second); + } + } +} + +template std::pair sincospi(T x) { + return {boost::math::sin_pi(x), boost::math::cos_pi(x)}; +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + sincospi_kernel, sincospi, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2))); +} + +TEST_CASE("Unit_Device_sincospi_Accuracy_Positive - double") { + const auto validator_builder = + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(2)); + UnaryDoublePrecisionTest(sincospi_kernel, sincospi, validator_builder); +} + +TEST_CASE("Unit_Device_sincospi_sincospif_Negative_RTC") { NegativeTestRTCWrapper<36>(kSincospi); } \ No newline at end of file diff --git a/catch/unit/math/trig_negative_kernels_rtc.hh b/catch/unit/math/trig_negative_kernels_rtc.hh new file mode 100644 index 0000000000..1c855d755e --- /dev/null +++ b/catch/unit/math/trig_negative_kernels_rtc.hh @@ -0,0 +1,320 @@ +// #define TRIG_UNARY_NEGATIVE_KERNELS(func_name) +// class Dummy { +// public: +// __device__ Dummy() {} +// __device__ ~Dummy() {} +// }; +// __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } +// __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } +// __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } +// __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +static constexpr auto kSin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinf_kernel_v1(float* x) { float result = sinf(x); } + __global__ void sinf_kernel_v2(Dummy x) { float result = sinf(x); } + __global__ void sin_kernel_v1(double* x) { double result = sin(x); } + __global__ void sin_kernel_v2(Dummy x) { double result = sin(x); } + )"}; + +static constexpr auto kCos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cosf_kernel_v1(float* x) { float result = cosf(x); } + __global__ void cosf_kernel_v2(Dummy x) { float result = cosf(x); } + __global__ void cos_kernel_v1(double* x) { double result = cos(x); } + __global__ void cos_kernel_v2(Dummy x) { double result = cos(x); } + )"}; + +static constexpr auto kTan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanf_kernel_v1(float* x) { float result = tanf(x); } + __global__ void tanf_kernel_v2(Dummy x) { float result = tanf(x); } + __global__ void tan_kernel_v1(double* x) { double result = tan(x); } + __global__ void tan_kernel_v2(Dummy x) { double result = tan(x); } + )"}; + +static constexpr auto kAsin{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinf_kernel_v1(float* x) { float result = asinf(x); } + __global__ void asinf_kernel_v2(Dummy x) { float result = asinf(x); } + __global__ void asin_kernel_v1(double* x) { double result = asin(x); } + __global__ void asin_kernel_v2(Dummy x) { double result = asin(x); } + )"}; + +static constexpr auto kAcos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acosf_kernel_v1(float* x) { float result = acosf(x); } + __global__ void acosf_kernel_v2(Dummy x) { float result = acosf(x); } + __global__ void acos_kernel_v1(double* x) { double result = acos(x); } + __global__ void acos_kernel_v2(Dummy x) { double result = acos(x); } + )"}; + +static constexpr auto kAtan{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanf_kernel_v1(float* x) { float result = atanf(x); } + __global__ void atanf_kernel_v2(Dummy x) { float result = atanf(x); } + __global__ void atan_kernel_v1(double* x) { double result = atan(x); } + __global__ void atan_kernel_v2(Dummy x) { double result = atan(x); } + )"}; + +static constexpr auto kSinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinhf_kernel_v1(float* x) { float result = sinhf(x); } + __global__ void sinhf_kernel_v2(Dummy x) { float result = sinhf(x); } + __global__ void sinh_kernel_v1(double* x) { double result = sinh(x); } + __global__ void sinh_kernel_v2(Dummy x) { double result = sinh(x); } + )"}; + +static constexpr auto kCosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void coshf_kernel_v1(float* x) { float result = coshf(x); } + __global__ void coshf_kernel_v2(Dummy x) { float result = coshf(x); } + __global__ void cosh_kernel_v1(double* x) { double result = cosh(x); } + __global__ void cosh_kernel_v2(Dummy x) { double result = cosh(x); } + )"}; + +static constexpr auto kTanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tanhf_kernel_v1(float* x) { float result = tanhf(x); } + __global__ void tanhf_kernel_v2(Dummy x) { float result = tanhf(x); } + __global__ void tanh_kernel_v1(double* x) { double result = tanh(x); } + __global__ void tanh_kernel_v2(Dummy x) { double result = tanh(x); } + )"}; + +static constexpr auto kAsinh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void asinhf_kernel_v1(float* x) { float result = asinhf(x); } + __global__ void asinhf_kernel_v2(Dummy x) { float result = asinhf(x); } + __global__ void asinh_kernel_v1(double* x) { double result = asinh(x); } + __global__ void asinh_kernel_v2(Dummy x) { double result = asinh(x); } + )"}; + +static constexpr auto kAcosh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void acoshf_kernel_v1(float* x) { float result = acoshf(x); } + __global__ void acoshf_kernel_v2(Dummy x) { float result = acoshf(x); } + __global__ void acosh_kernel_v1(double* x) { double result = acosh(x); } + __global__ void acosh_kernel_v2(Dummy x) { double result = acosh(x); } + )"}; + +static constexpr auto kAtanh{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atanhf_kernel_v1(float* x) { float result = atanhf(x); } + __global__ void atanhf_kernel_v2(Dummy x) { float result = atanhf(x); } + __global__ void atanh_kernel_v1(double* x) { double result = atanh(x); } + __global__ void atanh_kernel_v2(Dummy x) { double result = atanh(x); } + )"}; + +static constexpr auto kSinpi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sinpif_kernel_v1(float* x) { float result = sinpif(x); } + __global__ void sinpif_kernel_v2(Dummy x) { float result = sinpif(x); } + __global__ void sinpi_kernel_v1(double* x) { double result = sinpi(x); } + __global__ void sinpi_kernel_v2(Dummy x) { double result = sinpi(x); } + )"}; + +static constexpr auto kCospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cospif_kernel_v1(float* x) { float result = cospif(x); } + __global__ void cospif_kernel_v2(Dummy x) { float result = cospif(x); } + __global__ void cospi_kernel_v1(double* x) { double result = cospi(x); } + __global__ void cospi_kernel_v2(Dummy x) { double result = cospi(x); } + )"}; + +static constexpr auto kAtan2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } + __global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + __global__ void atan2_kernel_v1(double* x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v2(double x, double* y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v3(Dummy x, double y) { double result = atan2(x, y); } + __global__ void atan2_kernel_v4(double x, Dummy y) { double result = atan2(x, y); } + )"}; + +static constexpr auto kSincos{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } + __global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); + } + __global__ void sincos_kernel_v1(double* x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v2(Dummy x, double* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v3(double x, char* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v4(double x, short* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v5(double x, int* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v6(double x, long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v7(double x, long long* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v8(double x, float* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v9(double x, Dummy* sptr, double* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v10(double x, const double* sptr, double* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v11(double x, double* sptr, char* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v12(double x, double* sptr, short* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v13(double x, double* sptr, int* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v14(double x, double* sptr, long* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v15(double x, double* sptr, long long* cptr) { + sincos(x, sptr, cptr); + } + __global__ void sincos_kernel_v16(double x, double* sptr, float* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v17(double x, double* sptr, Dummy* cptr) { sincos(x, sptr, cptr); } + __global__ void sincos_kernel_v18(double x, double* sptr, const double* cptr) { + sincos(x, sptr, cptr); + } + )"}; + +static constexpr auto kSincospi{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } + __global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); + } + __global__ void sincospi_kernel_v1(float* x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v3(float x, char* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v4(float x, short* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v5(float x, int* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v6(float x, long* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v7(float x, long long* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v8(float x, double* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v10(float x, const float* sptr, float* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v11(float x, float* sptr, char* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v12(float x, float* sptr, short* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v13(float x, float* sptr, int* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v14(float x, float* sptr, long* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v15(float x, float* sptr, long long* cptr) { + sincospi(x, sptr, cptr); + } + __global__ void sincospi_kernel_v16(float x, float* sptr, double* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v17(float x, float* sptr, Dummy* cptr) { sincospi(x, sptr, cptr); } + __global__ void sincospi_kernel_v18(float x, float* sptr, const float* cptr) { + sincospi(x, sptr, cptr); + } + )"}; \ No newline at end of file diff --git a/catch/unit/math/trig_single_precision_negative_kernels.cc b/catch/unit/math/trig_single_precision_negative_kernels.cc new file mode 100644 index 0000000000..5e66d386be --- /dev/null +++ b/catch/unit/math/trig_single_precision_negative_kernels.cc @@ -0,0 +1,118 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define TRIG_SP_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +/*Expecting 2 errors per macro invocation - 26 total*/ +TRIG_SP_UNARY_NEGATIVE_KERNELS(sin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asin) +TRIG_SP_UNARY_NEGATIVE_KERNELS(acos) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atan) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cosh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(tanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(asinh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(atanh) +TRIG_SP_UNARY_NEGATIVE_KERNELS(sinpi) +TRIG_SP_UNARY_NEGATIVE_KERNELS(cospi) + +/*Expecting 4 errors*/ +__global__ void atan2f_kernel_v1(float* x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v2(float x, float* y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v3(Dummy x, float y) { float result = atan2f(x, y); } +__global__ void atan2f_kernel_v4(float x, Dummy y) { float result = atan2f(x, y); } + +/*Expecting 18 errors*/ +__global__ void sincosf_kernel_v1(float* x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v2(Dummy x, float* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v3(float x, char* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v4(float x, short* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v5(float x, int* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v6(float x, long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v7(float x, long long* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v8(float x, double* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v9(float x, Dummy* sptr, float* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v10(float x, const float* sptr, float* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v11(float x, float* sptr, char* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v12(float x, float* sptr, short* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v13(float x, float* sptr, int* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v14(float x, float* sptr, long* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v15(float x, float* sptr, long long* cptr) { + sincosf(x, sptr, cptr); +} +__global__ void sincosf_kernel_v16(float x, float* sptr, double* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v17(float x, float* sptr, Dummy* cptr) { sincosf(x, sptr, cptr); } +__global__ void sincosf_kernel_v18(float x, float* sptr, const float* cptr) { + sincosf(x, sptr, cptr); +} + +/*Expecting 18 errors*/ +__global__ void sincospif_kernel_v1(float* x, float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v2(Dummy x, float* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v3(float x, char* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v4(float x, short* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v5(float x, int* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v6(float x, long* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v7(float x, long long* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v8(float x, double* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v9(float x, Dummy* sptr, float* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v10(float x, const float* sptr, float* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v11(float x, float* sptr, char* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v12(float x, float* sptr, short* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v13(float x, float* sptr, int* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v14(float x, float* sptr, long* cptr) { sincospif(x, sptr, cptr); } +__global__ void sincospif_kernel_v15(float x, float* sptr, long long* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v16(float x, float* sptr, double* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v17(float x, float* sptr, Dummy* cptr) { + sincospif(x, sptr, cptr); +} +__global__ void sincospif_kernel_v18(float x, float* sptr, const float* cptr) { + sincospif(x, sptr, cptr); +} \ No newline at end of file diff --git a/catch/unit/math/unary_common.hh b/catch/unit/math/unary_common.hh new file mode 100644 index 0000000000..d80ffd1bbc --- /dev/null +++ b/catch/unit/math/unary_common.hh @@ -0,0 +1,198 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_UNARY_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(RT* const ys, const size_t num_xs, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(xs[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(xs[i]); \ + } \ + } \ + } + +template +void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint32_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnarySinglePrecisionRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, const float a, + const float b) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) + sizeof(T)); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + + uint32_t val = 0u; + const auto num_threads = thread_pool.thread_count(); + + size_t inserted = 0u; + for (float v = a; v != b; v = std::nextafter(v, b)) { + values.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); + inserted = 0u; + } +} + +template +void UnaryDoublePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const double a = std::numeric_limits::lowest(), + const double b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(double) + sizeof(T)), num_iterations); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(double)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &values] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void UnaryDoublePrecisionSpecialValuesTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + + MathTest math_test(kernel, values.size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, values.size, + values.data); +} + +template +void UnarySinglePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { UnarySinglePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void UnaryDoublePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + UnaryDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder); + } + + SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +#define MATH_UNARY_WITHIN_ULP_TEST_DEF(kern_name, ref_func, sp_ulp, dp_ulp) \ + MATH_UNARY_KERNEL_DEF(kern_name) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + double (*ref)(double) = ref_func; \ + UnarySinglePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(sp_ulp)); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + long double (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionTest(kern_name##_kernel, ref, \ + ULPValidatorBuilderFactory(dp_ulp)); \ + } + +#define MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(func_name, sp_ulp, dp_ulp) \ + MATH_UNARY_WITHIN_ULP_TEST_DEF(func_name, std::func_name, sp_ulp, dp_ulp) diff --git a/catch/unit/math/validators.hh b/catch/unit/math/validators.hh new file mode 100644 index 0000000000..b732f79354 --- /dev/null +++ b/catch/unit/math/validators.hh @@ -0,0 +1,152 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +// Define a new MatcherBase class with a public 'describe' member function because +// Catch::MatcherBase::describe is protected and thus can't be used via a pointer to +// Catch::MatcherBase. +template class MatcherBase : public Catch::MatcherBase { + public: + virtual std::string describe() const = 0; + virtual ~MatcherBase() = default; +}; + +template class ValidatorBase : public MatcherBase { + public: + template + ValidatorBase(T target, Ts&&... args) : matcher_{std::forward(args)...}, target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return matcher_.match(val); + } + + std::string describe() const override { + if (std::isnan(target_)) { + return "is not NaN"; + } + + return matcher_.describe(); + } + + private: + Matcher matcher_; + T target_; + bool nan = false; +}; + +template auto ULPValidatorBuilderFactory(int64_t ulps) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinULP(target, ulps)); + }; +}; + +template auto AbsValidatorBuilderFactory(double margin) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinAbs(target, margin)); + }; +} + +template auto RelValidatorBuilderFactory(T margin) { + return [=](T target, auto&&... args) { + return std::make_unique>( + target, Catch::WithinRel(target, margin)); + }; +} + +template class EqValidator : public MatcherBase { + public: + EqValidator(T target) : target_{target} {} + + bool match(const T& val) const override { + if (std::isnan(target_)) { + return std::isnan(val); + } + + return target_ == val; + } + + std::string describe() const override { + std::stringstream ss; + ss << " is not equal to " << target_; + return ss.str(); + } + + private: + T target_; +}; + +template auto EqValidatorBuilderFactory() { + return [](T val, auto&&... args) { return std::make_unique>(val); }; +} + +template +class PairValidator : public MatcherBase> { + public: + PairValidator(const std::pair& target, const VBF& vbf, const VBS& vbs) + : first_matcher_{vbf(target.first)}, second_matcher_{vbs(target.second)} {} + + bool match(const std::pair& val) const override { + return first_matcher_->match(val.first) && second_matcher_->match(val.second); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(std::declval())) first_matcher_; + decltype(std::declval()(std::declval())) second_matcher_; +}; + +template +auto PairValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const std::pair& t, auto&&... args) { + return std::make_unique>(t, vb, vb); + }; +} + +template +auto PairValidatorBuilderFactory(const VBF& vbf, const VBS& vbs) { + return [=](const std::pair& t, auto&&... args) { + return std::make_unique>(t, vbf, vbs); + }; +} + +template class NopValidator : public MatcherBase { + public: + bool match(const T& val) const override { return true; } + + std::string describe() const override { return ""; } +}; + +template auto NopValidatorBuilderFactory() { + return [](auto&&... args) { return std::make_unique>(); }; +} From 34b25ab7ace2757c3df1d2856651fc402ce648c8 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 22 Jan 2024 19:52:22 +0530 Subject: [PATCH 09/71] EXSWHTEC-290 - Implement tests for misc device math functions #232 Change-Id: Iab3d16bf9e0ae30ad6d7488a5fbbc7326c3befb7 --- catch/unit/math/CMakeLists.txt | 7 +- catch/unit/math/misc_funcs.cc | 96 ++++++++++ catch/unit/math/misc_negative_kernels.cc | 87 +++++++++ catch/unit/math/misc_negative_kernels_rtc.hh | 177 +++++++++++++++++++ 4 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 catch/unit/math/misc_funcs.cc create mode 100644 catch/unit/math/misc_negative_kernels.cc create mode 100644 catch/unit/math/misc_negative_kernels_rtc.hh diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 2ebf11063d..380e1c0d4a 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -20,6 +20,7 @@ set(TEST_SRC trig_funcs.cc + misc_funcs.cc ) hip_add_exe_to_target(NAME MathsTest @@ -34,4 +35,8 @@ add_test(NAME Unit_Device_Single_Precision_Trig_Functions_Negative add_test(NAME Unit_Device_Double_Precision_Trig_Functions_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} - trig_double_precision_negative_kernels.cc 66) \ No newline at end of file + trig_double_precision_negative_kernels.cc 66) +add_test(NAME Unit_Device_Misc_Functions_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + misc_negative_kernels.cc 76) diff --git a/catch/unit/math/misc_funcs.cc b/catch/unit/math/misc_funcs.cc new file mode 100644 index 0000000000..35e21fb26e --- /dev/null +++ b/catch/unit/math/misc_funcs.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "misc_negative_kernels_rtc.hh" + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +MATH_UNARY_WITHIN_ULP_TEST_DEF(fabs, std::fabs, 0, 0) +TEST_CASE("Unit_Device_fabs_fabsf_Negative_RTC") { NegativeTestRTCWrapper<4>(kFabs); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(copysign, std::copysign, 0, 0) +TEST_CASE("Unit_Device_copysign_copysignf_Negative_RTC") { NegativeTestRTCWrapper<8>(kCopySign); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmax, std::fmax, 0, 0) +TEST_CASE("Unit_Device_fmax_fmaxf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmax); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmin, std::fmin, 0, 0) +TEST_CASE("Unit_Device_fmin_fminf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmin); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(nextafter, std::nextafter, 0, 0) +TEST_CASE("Unit_Device_nextafter_nextafterf_Negative_RTC") { + NegativeTestRTCWrapper<8>(kNextAfter); +} + +MATH_TERNARY_WITHIN_ULP_TEST_DEF(fma, std::fma, 0, 0) +TEST_CASE("Unit_Device_fma_fmaf_Negative_RTC") { NegativeTestRTCWrapper<12>(kFma); } + +__global__ void fdividef_kernel(float* const ys, const size_t num_xs, float* const x1s, + float* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = fdividef(x1s[i], x2s[i]); + } +} + +TEST_CASE("Unit_Device_fdividef_Accuracy_Positive") { + double (*ref)(double, double) = [](double x1, double x2) { return x1 / x2; }; + BinaryFloatingPointTest(fdividef_kernel, ref, ULPValidatorBuilderFactory(0)); +} + +TEST_CASE("Unit_Device_fdividef_Negative_RTC") { NegativeTestRTCWrapper<4>(kFdividef); } + +#define MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(kern_name, ref_func) \ + template \ + __global__ void kern_name##_kernel(bool* const ys, const size_t num_xs, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = kern_name(xs[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + bool (*ref)(double) = ref_func; \ + UnarySinglePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + bool (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isfinite, std::isfinite) +TEST_CASE("Unit_Device_isfinite_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsFinite); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isinf, std::isinf) +TEST_CASE("Unit_Device_isinf_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsInf); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(isnan, std::isnan) +TEST_CASE("Unit_Device_isnan_Negative_RTC") { NegativeTestRTCWrapper<4>(kIsNan); } + +MATH_BOOL_RETURNING_FUNCTION_TEST_DEF(signbit, std::signbit) +TEST_CASE("Unit_Device_signbit_Negative_RTC") { NegativeTestRTCWrapper<4>(kSignBit); } \ No newline at end of file diff --git a/catch/unit/math/misc_negative_kernels.cc b/catch/unit/math/misc_negative_kernels.cc new file mode 100644 index 0000000000..761bd9cebf --- /dev/null +++ b/catch/unit/math/misc_negative_kernels.cc @@ -0,0 +1,87 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define MISC_UNARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +#define MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v3(double* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v4(Dummy x) { bool result = func_name(x); } + +#define MISC_BINARY_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(float x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); } \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } + +/*Expecting 4 errors*/ +MISC_UNARY_NEGATIVE_KERNELS(fabs) + +/*Expecting 8 errors per macro invocation - 40 total*/ +MISC_BINARY_NEGATIVE_KERNELS(copysign) +MISC_BINARY_NEGATIVE_KERNELS(fmax) +MISC_BINARY_NEGATIVE_KERNELS(fmin) +MISC_BINARY_NEGATIVE_KERNELS(nextafter) +MISC_BINARY_NEGATIVE_KERNELS(fma) + +/*Expecting 4 errors*/ +__global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); } + +/*Expecting 4 errors per macro invocation - 16 total*/ +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isfinite) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isinf) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(isnan) +MISC_UNARY_BOOL_RET_NEGATIVE_KERNELS(signbit) + +/*Expecting 12 errors*/ +__global__ void fmaf_kernel_v1(float* x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v2(Dummy x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v3(float x, float* y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v4(float x, Dummy y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v5(float x, float y, float* z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v6(float x, float y, Dummy z) { float result = fmaf(x, y, z); } +__global__ void fma_kernel_v1(double* x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v2(Dummy x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v3(double x, double* y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v4(double x, Dummy y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v5(double x, double y, double* z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v6(double x, double y, Dummy z) { double result = fmaf(x, y, z); } \ No newline at end of file diff --git a/catch/unit/math/misc_negative_kernels_rtc.hh b/catch/unit/math/misc_negative_kernels_rtc.hh new file mode 100644 index 0000000000..66521da090 --- /dev/null +++ b/catch/unit/math/misc_negative_kernels_rtc.hh @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + + +static constexpr auto kFabs{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fabsf_kernel_v1(float* x) { float result = fabsf(x); } +__global__ void fabsf_kernel_v2(Dummy x) { float result = fabsf(x); } +__global__ void fabs_kernel_v1(double* x) { double result = fabs(x); } +__global__ void fabs_kernel_v2(Dummy x) { double result = fabs(x); } +)"}; + +static constexpr auto kCopySign{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void copysignf_kernel_v1(float* x, float y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v2(Dummy x, float y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v3(float x, float* y) { float result = copysignf(x, y); } +__global__ void copysignf_kernel_v4(float x, Dummy y) { float result = copysignf(x, y); } +__global__ void copysign_kernel_v1(double* x, double y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v2(Dummy x, double y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v3(double x, double* y) { double result = copysign(x, y); } +__global__ void copysign_kernel_v4(double x, Dummy y) { double result = copysign(x, y); } +)"}; + +static constexpr auto kFmax{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fmaxf_kernel_v1(float* x, float y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v2(Dummy x, float y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v3(float x, float* y) { float result = fmaxf(x, y); } +__global__ void fmaxf_kernel_v4(float x, Dummy y) { float result = fmaxf(x, y); } +__global__ void fmax_kernel_v1(double* x, double y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v2(Dummy x, double y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v3(double x, double* y) { double result = fmax(x, y); } +__global__ void fmax_kernel_v4(double x, Dummy y) { double result = fmax(x, y); } +)"}; + +static constexpr auto kFmin{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fminf_kernel_v1(float* x, float y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v2(Dummy x, float y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v3(float x, float* y) { float result = fminf(x, y); } +__global__ void fminf_kernel_v4(float x, Dummy y) { float result = fminf(x, y); } +__global__ void fmin_kernel_v1(double* x, double y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v2(Dummy x, double y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v3(double x, double* y) { double result = fmin(x, y); } +__global__ void fmin_kernel_v4(double x, Dummy y) { double result = fmin(x, y); } +)"}; + +static constexpr auto kNextAfter{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void nextafterf_kernel_v1(float* x, float y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v2(Dummy x, float y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v3(float x, float* y) { float result = nextafterf(x, y); } +__global__ void nextafterf_kernel_v4(float x, Dummy y) { float result = nextafterf(x, y); } +__global__ void nextafter_kernel_v1(double* x, double y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v2(Dummy x, double y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v3(double x, double* y) { double result = nextafter(x, y); } +__global__ void nextafter_kernel_v4(double x, Dummy y) { double result = nextafter(x, y); } +)"}; + +static constexpr auto kFma{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fmaf_kernel_v1(float* x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v2(Dummy x, float y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v3(float x, float* y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v4(float x, Dummy y, float z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v5(float x, float y, float* z) { float result = fmaf(x, y, z); } +__global__ void fmaf_kernel_v6(float x, float y, Dummy z) { float result = fmaf(x, y, z); } +__global__ void fma_kernel_v1(double* x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v2(Dummy x, double y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v3(double x, double* y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v4(double x, Dummy y, double z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v5(double x, double y, double* z) { double result = fmaf(x, y, z); } +__global__ void fma_kernel_v6(double x, double y, Dummy z) { double result = fmaf(x, y, z); } +)"}; + +static constexpr auto kFdividef{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void fdividef_kernel_v1(float* x, float y) { float result = fdividef(x, y); } +__global__ void fdividef_kernel_v2(Dummy x, float y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v3(float x, float* y) { float result = fdivide(x); } +__global__ void fdividef_kernel_v4(float x, Dummy y) { float result = fdivide(x); } +)"}; + +static constexpr auto kIsFinite{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isfinite_kernel_v1(float* x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v2(Dummy x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v3(double* x) { bool result = isfinite(x); } +__global__ void isfinite_kernel_v4(Dummy x) { bool result = isfinite(x); } +)"}; + +static constexpr auto kIsInf{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isinf_kernel_v1(float* x) { bool result = isinf(x); } +__global__ void isinf_kernel_v2(Dummy x) { bool result = isinf(x); } +__global__ void isinf_kernel_v3(double* x) { bool result = isinf(x); } +__global__ void isinf_kernel_v4(Dummy x) { bool result = isinf(x); } +)"}; + +static constexpr auto kIsNan{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void isnan_kernel_v1(float* x) { bool result = isnan(x); } +__global__ void isnan_kernel_v2(Dummy x) { bool result = isnan(x); } +__global__ void isnan_kernel_v3(double* x) { bool result = isnan(x); } +__global__ void isnan_kernel_v4(Dummy x) { bool result = isnan(x); } +)"}; + +static constexpr auto kSignBit{R"( +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; +__global__ void signbit_kernel_v1(float* x) { bool result = signbit(x); } +__global__ void signbit_kernel_v2(Dummy x) { bool result = signbit(x); } +__global__ void signbit_kernel_v3(double* x) { bool result = signbit(x); } +__global__ void signbit_kernel_v4(Dummy x) { bool result = signbit(x); } +)"}; From 111497351d4c87116fc54f143f3444668bda1199 Mon Sep 17 00:00:00 2001 From: Dino Music Date: Mon, 22 Jan 2024 20:09:00 +0530 Subject: [PATCH 10/71] EXSWHTEC-289 - Implement tests for remainder and rounding device math functions #234 Change-Id: I8413cfeb0cbf32e2e8369c5b1527c9794a595688 --- catch/include/hip_test_common.hh | 6 +- catch/unit/math/CMakeLists.txt | 20 +- catch/unit/math/math_common.hh | 6 + .../math/math_remainder_negative_kernels.cc | 113 +++++++ ...remainder_rounding_negative_kernels_rtc.hh | 276 ++++++++++++++++++ .../math/math_rounding_negative_kernels.cc | 43 +++ .../unit/math/remainder_and_rounding_funcs.cc | 153 ++++++++++ catch/unit/memory/CMakeLists.txt | 60 +--- 8 files changed, 616 insertions(+), 61 deletions(-) create mode 100644 catch/unit/math/math_remainder_negative_kernels.cc create mode 100644 catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh create mode 100644 catch/unit/math/math_rounding_negative_kernels.cc create mode 100644 catch/unit/math/remainder_and_rounding_funcs.cc diff --git a/catch/include/hip_test_common.hh b/catch/include/hip_test_common.hh index c2d19650bf..21707f7615 100644 --- a/catch/include/hip_test_common.hh +++ b/catch/include/hip_test_common.hh @@ -178,7 +178,7 @@ static inline bool IsGfx11() { hipDeviceProp_t props{}; HIP_CHECK(hipGetDevice(&device)); HIP_CHECK(hipGetDeviceProperties(&props, device)); - // Get GCN Arch Name and compare to check if it is gfx11 + // Get GCN Arch Name and compare to check if it is gfx11 std::string arch = std::string(props.gcnArchName); auto pos = arch.find("gfx11"); if (pos != std::string::npos) @@ -186,7 +186,7 @@ static inline bool IsGfx11() { else return false; #else - std::cout<<"Have to be either Nvidia or AMD platform, asserting"<(kernel, numBlocks, numThreads, memPerBlock, stream, std::forward(packedArgs)...); #endif -HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipGetLastError()); } //--- diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 380e1c0d4a..b0a2b5d00c 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -21,11 +21,19 @@ set(TEST_SRC trig_funcs.cc misc_funcs.cc + remainder_and_rounding_funcs.cc ) +if(HIP_PLATFORM MATCHES "nvidia") + set(LINKER_LIBS nvrtc) +elseif(HIP_PLATFORM MATCHES "amd") + set(LINKER_LIBS hiprtc) +endif() + hip_add_exe_to_target(NAME MathsTest TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC} + LINKER_LIBS ${LINKER_LIBS}) add_test(NAME Unit_Device_Single_Precision_Trig_Functions_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py @@ -40,3 +48,13 @@ add_test(NAME Unit_Device_Misc_Functions_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} misc_negative_kernels.cc 76) + +add_test(NAME Unit_Device_remainder_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_remainder_negative_kernels.cc 68) + +add_test(NAME Unit_Device_rounding_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_rounding_negative_kernels.cc 40) diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh index 8b59558389..7ebc9b8f5d 100644 --- a/catch/unit/math/math_common.hh +++ b/catch/unit/math/math_common.hh @@ -216,7 +216,13 @@ template void NegativeTestRTCWrapper(const char* program_source) HIPRTC_CHECK( hiprtcCreateProgram(&program, program_source, "math_test_rtc.cc", 0, nullptr, nullptr)); +#if HT_AMD + std::string args = std::string("-ferror-limit=100"); + const char* options[] = {args.c_str()}; + hiprtcResult result{hiprtcCompileProgram(program, 1, options)}; +#else hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; +#endif // Get the compile log and count compiler error messages size_t log_size{}; diff --git a/catch/unit/math/math_remainder_negative_kernels.cc b/catch/unit/math/math_remainder_negative_kernels.cc new file mode 100644 index 0000000000..2ebd26516e --- /dev/null +++ b/catch/unit/math/math_remainder_negative_kernels.cc @@ -0,0 +1,113 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { auto result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { auto result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(float* x, float y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(float x, float* y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { auto result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { auto result = func_name##f(x, y); } + +NEGATIVE_KERNELS_SHELL(fmod) +NEGATIVE_KERNELS_SHELL(remainder) + +__global__ void remquo_kernel_v1(double* x, double y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v2(Dummy x, double y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v3(double x, double* y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v4(double x, Dummy y, int* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v5(double x, double y, char* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v6(double x, double y, short* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v7(double x, double y, long* quo) { auto result = remquo(x, y, quo); } +__global__ void remquo_kernel_v8(double x, double y, long long* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v9(double x, double y, float* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v10(double x, double y, double* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v11(double x, double y, Dummy* quo) { + auto result = remquo(x, y, quo); +} +__global__ void remquo_kernel_v12(double x, double y, const int* quo) { + auto result = remquo(x, y, quo); +} + +__global__ void remquof_kernel_v1(float* x, float y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v2(Dummy x, float y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v3(float x, float* y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v4(float x, Dummy y, int* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v5(float x, float y, char* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v6(float x, float y, short* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v7(float x, float y, long* quo) { auto result = remquof(x, y, quo); } +__global__ void remquof_kernel_v8(float x, float y, long long* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v9(float x, float y, float* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v10(float x, float y, double* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v11(float x, float y, Dummy* quo) { + auto result = remquof(x, y, quo); +} +__global__ void remquof_kernel_v12(float x, float y, const int* quo) { + auto result = remquof(x, y, quo); +} + +__global__ void modf_kernel_v1(double* x, double* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v2(Dummy x, double* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v3(double x, int* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v4(double x, char* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v5(double x, short* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v6(double x, long* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v7(double x, long long* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v8(double x, float* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v9(double x, Dummy* iptr) { auto result = modf(x, iptr); } +__global__ void modf_kernel_v10(double x, const double* iptr) { auto result = modf(x, iptr); } + +__global__ void modff_kernel_v1(float* x, float* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v2(Dummy x, float* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v3(float x, int* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v4(float x, char* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v5(float x, short* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v6(float x, long* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v7(float x, long long* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v8(float x, double* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v9(float x, Dummy* iptr) { auto result = modff(x, iptr); } +__global__ void modff_kernel_v10(float x, const float* iptr) { auto result = modff(x, iptr); } + +NEGATIVE_KERNELS_SHELL(fdim) diff --git a/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh b/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh new file mode 100644 index 0000000000..e67a6d0092 --- /dev/null +++ b/catch/unit/math/math_remainder_rounding_negative_kernels_rtc.hh @@ -0,0 +1,276 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math remainder and rounding negative Test Cases that are using RTC. +*/ + +static constexpr auto kTrunc{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void trunc_kernel_v1(double* x) { auto result = trunc(x); } + __global__ void trunc_kernel_v2(Dummy x) { auto result = trunc(x); } + __global__ void truncf_kernel_v1(float* x) { auto result = truncf(x); } + __global__ void truncf_kernel_v2(Dummy x) { auto result = truncf(x); } +)"}; + +static constexpr auto kRound{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void round_kernel_v1(double* x) { auto result = round(x); } + __global__ void round_kernel_v2(Dummy x) { auto result = round(x); } + __global__ void roundf_kernel_v1(float* x) { auto result = roundf(x); } + __global__ void roundf_kernel_v2(Dummy x) { auto result = roundf(x); } +)"}; + +static constexpr auto kRint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rint_kernel_v1(double* x) { auto result = rint(x); } + __global__ void rint_kernel_v2(Dummy x) { auto result = rint(x); } + __global__ void rintf_kernel_v1(float* x) { auto result = rintf(x); } + __global__ void rintf_kernel_v2(Dummy x) { auto result = rintf(x); } +)"}; + +static constexpr auto kNearbyint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void nearbyint_kernel_v1(double* x) { auto result = nearbyint(x); } + __global__ void nearbyint_kernel_v2(Dummy x) { auto result = nearbyint(x); } + __global__ void nearbyintf_kernel_v1(float* x) { auto result = nearbyintf(x); } + __global__ void nearbyintf_kernel_v2(Dummy x) { auto result = nearbyintf(x); } +)"}; + +static constexpr auto kCeil{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ceil_kernel_v1(double* x) { auto result = ceil(x); } + __global__ void ceil_kernel_v2(Dummy x) { auto result = ceil(x); } + __global__ void ceilf_kernel_v1(float* x) { auto result = ceilf(x); } + __global__ void ceilf_kernel_v2(Dummy x) { auto result = ceilf(x); } +)"}; + +static constexpr auto kFloor{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void floor_kernel_v1(double* x) { auto result = floor(x); } + __global__ void floor_kernel_v2(Dummy x) { auto result = floor(x); } + __global__ void floorf_kernel_v1(float* x) { auto result = floorf(x); } + __global__ void floorf_kernel_v2(Dummy x) { auto result = floorf(x); } +)"}; + +static constexpr auto kLrint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lrint_kernel_v1(double* x) { auto result = lrint(x); } + __global__ void lrint_kernel_v2(Dummy x) { auto result = lrint(x); } + __global__ void lrintf_kernel_v1(float* x) { auto result = lrintf(x); } + __global__ void lrintf_kernel_v2(Dummy x) { auto result = lrintf(x); } +)"}; + +static constexpr auto kLround{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lround_kernel_v1(double* x) { auto result = lround(x); } + __global__ void lround_kernel_v2(Dummy x) { auto result = lround(x); } + __global__ void lroundf_kernel_v1(float* x) { auto result = lroundf(x); } + __global__ void lroundf_kernel_v2(Dummy x) { auto result = lroundf(x); } +)"}; + +static constexpr auto kLlrint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void llrint_kernel_v1(double* x) { auto result = llrint(x); } + __global__ void llrint_kernel_v2(Dummy x) { auto result = llrint(x); } + __global__ void llrintf_kernel_v1(float* x) { auto result = llrintf(x); } + __global__ void llrintf_kernel_v2(Dummy x) { auto result = llrintf(x); } +)"}; + +static constexpr auto kLlround{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void llround_kernel_v1(double* x) { auto result = llround(x); } + __global__ void llround_kernel_v2(Dummy x) { auto result = llround(x); } + __global__ void llroundf_kernel_v1(float* x) { auto result = llroundf(x); } + __global__ void llroundf_kernel_v2(Dummy x) { auto result = llroundf(x); } +)"}; + +static constexpr auto kFmod{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void fmod_kernel_v1(double* x, double y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v2(double x, double* y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v3(Dummy x, double y) { auto result = fmod(x, y); } + __global__ void fmod_kernel_v4(double x, Dummy y) { auto result = fmod(x, y); } + __global__ void fmodf_kernel_v1(float* x, float y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v2(float x, float* y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v3(Dummy x, float y) { auto result = fmodf(x, y); } + __global__ void fmodf_kernel_v4(float x, Dummy y) { auto result = fmodf(x, y); } +)"}; + +static constexpr auto kRemainder{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void remainder_kernel_v1(double* x, double y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v2(double x, double* y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v3(Dummy x, double y) { auto result = remainder(x, y); } + __global__ void remainder_kernel_v4(double x, Dummy y) { auto result = remainder(x, y); } + __global__ void remainderf_kernel_v1(float* x, float y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v2(float x, float* y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v3(Dummy x, float y) { auto result = remainderf(x, y); } + __global__ void remainderf_kernel_v4(float x, Dummy y) { auto result = remainderf(x, y); } +)"}; + +static constexpr auto kRemquo{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void remquo_kernel_v1(double* x, double y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v2(Dummy x, double y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v3(double x, double* y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v4(double x, Dummy y, int* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v5(double x, double y, char* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v6(double x, double y, short* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v7(double x, double y, long* quo) { auto result = remquo(x, y, quo); } + __global__ void remquo_kernel_v8(double x, double y, long long* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v9(double x, double y, float* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v10(double x, double y, double* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v11(double x, double y, Dummy* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquo_kernel_v12(double x, double y, const int* quo) { + auto result = remquo(x, y, quo); + } + __global__ void remquof_kernel_v1(float* x, float y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v2(Dummy x, float y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v3(float x, float* y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v4(float x, Dummy y, int* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v5(float x, float y, char* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v6(float x, float y, short* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v7(float x, float y, long* quo) { auto result = remquof(x, y, quo); } + __global__ void remquof_kernel_v8(float x, float y, long long* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v9(float x, float y, float* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v10(float x, float y, double* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v11(float x, float y, Dummy* quo) { + auto result = remquof(x, y, quo); + } + __global__ void remquof_kernel_v12(float x, float y, const int* quo) { + auto result = remquof(x, y, quo); + } +)"}; + +static constexpr auto kModf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void modf_kernel_v1(double* x, double* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v2(Dummy x, double* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v3(double x, int* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v4(double x, char* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v5(double x, short* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v6(double x, long* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v7(double x, long long* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v8(double x, float* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v9(double x, Dummy* iptr) { auto result = modf(x, iptr); } + __global__ void modf_kernel_v10(double x, const double* iptr) { auto result = modf(x, iptr); } + __global__ void modff_kernel_v1(float* x, float* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v2(Dummy x, float* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v3(float x, int* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v4(float x, char* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v5(float x, short* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v6(float x, long* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v7(float x, long long* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v8(float x, double* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v9(float x, Dummy* iptr) { auto result = modff(x, iptr); } + __global__ void modff_kernel_v10(float x, const float* iptr) { auto result = modff(x, iptr); } +)"}; + +static constexpr auto kFdim{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void fdim_kernel_v1(double* x, double y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v2(double x, double* y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v3(Dummy x, double y) { auto result = fdim(x, y); } + __global__ void fdim_kernel_v4(double x, Dummy y) { auto result = fdim(x, y); } + __global__ void fdimf_kernel_v1(float* x, float y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v2(float x, float* y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v3(Dummy x, float y) { auto result = fdimf(x, y); } + __global__ void fdimf_kernel_v4(float x, Dummy y) { auto result = fdimf(x, y); } +)"}; diff --git a/catch/unit/math/math_rounding_negative_kernels.cc b/catch/unit/math/math_rounding_negative_kernels.cc new file mode 100644 index 0000000000..857f50d5dd --- /dev/null +++ b/catch/unit/math/math_rounding_negative_kernels.cc @@ -0,0 +1,43 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { auto result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { auto result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { auto result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { auto result = func_name##f(x); } + +NEGATIVE_KERNELS_SHELL(trunc) +NEGATIVE_KERNELS_SHELL(round) +NEGATIVE_KERNELS_SHELL(rint) +NEGATIVE_KERNELS_SHELL(nearbyint) +NEGATIVE_KERNELS_SHELL(ceil) +NEGATIVE_KERNELS_SHELL(floor) +NEGATIVE_KERNELS_SHELL(lrint) +NEGATIVE_KERNELS_SHELL(lround) +NEGATIVE_KERNELS_SHELL(llrint) +NEGATIVE_KERNELS_SHELL(llround) diff --git a/catch/unit/math/remainder_and_rounding_funcs.cc b/catch/unit/math/remainder_and_rounding_funcs.cc new file mode 100644 index 0000000000..635a68a459 --- /dev/null +++ b/catch/unit/math/remainder_and_rounding_funcs.cc @@ -0,0 +1,153 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "math_remainder_rounding_negative_kernels_rtc.hh" + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fmod, std::fmod, 0, 0) +TEST_CASE("Unit_Device_fmod_fmodf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFmod); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(remainder, std::remainder, 0, 0) +TEST_CASE("Unit_Device_remainder_remainder_Negative_RTC") { NegativeTestRTCWrapper<8>(kRemainder); } + +MATH_BINARY_WITHIN_ULP_TEST_DEF(fdim, std::fdim, 0, 0) +TEST_CASE("Unit_Device_fdim_fdimf_Negative_RTC") { NegativeTestRTCWrapper<8>(kFdim); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(trunc, std::trunc, 0, 0) +TEST_CASE("Unit_Device_trunc_truncf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTrunc); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(round, std::round, 0, 0) +TEST_CASE("Unit_Device_round_roundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRound); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(rint, std::rint, 0, 0) +TEST_CASE("Unit_Device_rint_rintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRint); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(nearbyint, std::nearbyint, 0, 0) +TEST_CASE("Unit_Device_nearbyint_nearbyintf_Negative_RTC") { + NegativeTestRTCWrapper<4>(kNearbyint); +} + +MATH_UNARY_WITHIN_ULP_TEST_DEF(ceil, std::ceil, 0, 0) +TEST_CASE("Unit_Device_ceil_ceilf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCeil); } + +MATH_UNARY_WITHIN_ULP_TEST_DEF(floor, std::floor, 0, 0) +TEST_CASE("Unit_Device_floor_floorf_Negative_RTC") { NegativeTestRTCWrapper<4>(kFloor); } + + +#define LONG_CONVERSION_FUNCTION_TEST_DEF(kern_name, ref_func, lt) \ + MATH_UNARY_KERNEL_DEF(kern_name) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - float") { \ + lt (*ref)(double) = ref_func; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, \ + EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::lowest()), \ + static_cast(std::numeric_limits::max())); \ + } \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive - double") { \ + lt (*ref)(long double) = ref_func; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, \ + EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::lowest()), \ + static_cast(std::numeric_limits::max())); \ + } + +LONG_CONVERSION_FUNCTION_TEST_DEF(lrint, std::lrint, long) +TEST_CASE("Unit_Device_lrint_lrintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLrint); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(lround, std::lround, long) +TEST_CASE("Unit_Device_lround_lroundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLround); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(llrint, std::llrint, long long) +TEST_CASE("Unit_Device_llrint_llrintf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLlrint); } + +LONG_CONVERSION_FUNCTION_TEST_DEF(llround, std::llround, long long) +TEST_CASE("Unit_Device_llround_llroundf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLlround); } + + +template +__global__ void remquo_kernel(std::pair* const ys, const size_t num_xs, T* const x1s, + T* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = remquof(x1s[i], x2s[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = remquo(x1s[i], x2s[i], &ys[i].second); + } + } +} + +template std::pair remquo_wrapper(T x1, T x2) { + std::pair ret; + ret.first = std::remquo(x1, x2, &ret.second); + return ret; +} + +TEMPLATE_TEST_CASE("Unit_Device_remquo_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + std::pair (*ref)(RT, RT) = remquo_wrapper; + const auto ulp_builder = ULPValidatorBuilderFactory(0); + const auto eq_builder = EqValidatorBuilderFactory(); + + BinaryFloatingPointTest(remquo_kernel, ref, + PairValidatorBuilderFactory(ulp_builder, eq_builder)); +} + +TEST_CASE("Unit_Device_remquo_remquof_Negative_RTC") { NegativeTestRTCWrapper<24>(kRemquo); } + +template +__global__ void modf_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = modff(xs[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = modf(xs[i], &ys[i].second); + } + } +} + +template std::pair modf_wrapper(T x) { + std::pair ret; + ret.first = std::modf(x, &ret.second); + return ret; +} + +TEST_CASE("Unit_Device_modf_Accuracy_Positive - float") { + UnarySinglePrecisionTest( + modf_kernel, modf_wrapper, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0))); +} + +TEST_CASE("Unit_Device_modf_Accuracy_Positive - double") { + UnaryDoublePrecisionTest( + modf_kernel, modf_wrapper, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0))); +} + +TEST_CASE("Unit_Device_modf_modff_Negative_RTC") { NegativeTestRTCWrapper<20>(kModf); } diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index fda74f5b2e..d99cf33b32 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -83,17 +83,7 @@ if(HIP_PLATFORM MATCHES "amd") hipExtMallocWithFlags.cc hipMallocMngdMultiThread.cc hipArray.cc - hipMemVmm.cc - hipMemCreate.cc - hipMemMap.cc - hipMemGetAllocationGranularity.cc - hipMemSetGetAccess.cc - hipMemRetainAllocationHandle.cc - hipMemUnmap.cc - hipMemAddressFree.cc - hipMemAddressReserve.cc - hipMemRelease.cc - hipMemGetAllocationPropertiesFromHandle.cc) + hipMemVmm.cc) else() set(TEST_SRC ${TEST_SRC} hipGetSymbolSizeAddress.cc) endif() @@ -138,8 +128,10 @@ set(TEST_SRC hipMemsetFunctional.cc hipMalloc.cc hipMallocPitch.cc + hipMallocArray.cc hipMalloc3D.cc hipMalloc3DArray.cc + hipArrayCreate.cc hipArray3DCreate.cc hipDrvMemcpy3D.cc hipDrvMemcpy3D_old.cc @@ -165,52 +157,6 @@ set(TEST_SRC hipMallocMipmappedArray.cc hipFreeMipmappedArray.cc) -set(NOT_FOR_MI200_AND_ABOVE_TEST hipMallocArray.cc hipArrayCreate.cc) # tests not for MI200+ -set(MI200_AND_ABOVE_TARGETS gfx90a gfx940 gfx941 gfx942) -function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) - set(ARCH_CHECK -1 PARENT_SCOPE) - string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL}) - foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST) - string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH}) - if (CMAKE_MATCH_COUNT EQUAL 1) - if (CMAKE_MATCH_1 IN_LIST MI200_AND_ABOVE_TARGETS) - set(ARCH_CHECK 1 PARENT_SCOPE) - endif() # CMAKE_MATCH_1 - endif() # CMAKE_MATCH_COUNT - endforeach() # OFFLOAD_ARCH_LIST -endfunction() # CheckAcceptedArchs - -if(HIP_PLATFORM MATCHES "amd") - if (DEFINED OFFLOAD_ARCH_STR) - CheckRejectedArchs(${OFFLOAD_ARCH_STR}) - elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) - CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET}) - else() - set(ARCH_CHECK -1) - endif() - if(${ARCH_CHECK} EQUAL -1) - message(STATUS "Adding test: ${NOT_FOR_MI200_AND_ABOVE_TEST}") - set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) - endif() -else() - set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) -endif() - hip_add_exe_to_target(NAME MemoryTest2 TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) - -if(HIP_PLATFORM MATCHES "amd") - set(TEST_SRC - hipSVMTestByteGranularity.cpp - hipSVMTestFineGrainMemoryConsistency.cpp - hipSVMTestFineGrainSyncBuffers.cpp - hipSVMTestSharedAddressSpaceFineGrain.cpp - ) - - hip_add_exe_to_target(NAME SVMAtomicTest - TEST_SRC ${TEST_SRC} - TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) - - add_dependencies(build_tests hipHostRegisterPerf) -endif() From b0622f51e2b457036d0a2db3740ea717d515b467 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Mon, 22 Jan 2024 20:50:14 +0530 Subject: [PATCH 11/71] EXSWHTEC-283 - Introduce base implementations for numerical accuracy tests #260 Change-Id: I9cf71b420b7fc73fcd9df162b963fda73878cacb --- catch/unit/math/binary_common.hh | 2 +- catch/unit/math/quaternary_common.hh | 2 +- catch/unit/math/ternary_common.hh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/catch/unit/math/binary_common.hh b/catch/unit/math/binary_common.hh index 72de23096e..2ddaee474f 100644 --- a/catch/unit/math/binary_common.hh +++ b/catch/unit/math/binary_common.hh @@ -133,4 +133,4 @@ void BinaryFloatingPointTest(kernel_sig kernel, ref_sig, ref, \ ULPValidatorBuilderFactory(ulp)); \ - } \ No newline at end of file + } diff --git a/catch/unit/math/quaternary_common.hh b/catch/unit/math/quaternary_common.hh index b29eb52ef4..a9a8cc6778 100644 --- a/catch/unit/math/quaternary_common.hh +++ b/catch/unit/math/quaternary_common.hh @@ -243,4 +243,4 @@ void QuaternaryFloatingPointTest(kernel_sig kernel, \ QuaternaryFloatingPointTest(kern_name##_kernel, ref, \ ULPValidatorBuilderFactory(ulp)); \ - } \ No newline at end of file + } diff --git a/catch/unit/math/ternary_common.hh b/catch/unit/math/ternary_common.hh index 53b28c6b5a..a335073916 100644 --- a/catch/unit/math/ternary_common.hh +++ b/catch/unit/math/ternary_common.hh @@ -139,4 +139,4 @@ void TernaryFloatingPointTest(kernel_sig kernel, ref_sig, ref, \ ULPValidatorBuilderFactory(ulp)); \ - } \ No newline at end of file + } From 17291543418b487f6f42e6aa7b4a46adfa48f3b3 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Mon, 22 Jan 2024 22:12:44 +0530 Subject: [PATCH 12/71] EXSWHTEC-291 - Implement tests for floating-point and integer math intrinsics #227 Change-Id: I55a3cbf5ce15cd93a280af295b88c28e51246148 --- catch/unit/math/CMakeLists.txt | 18 + .../unit/math/double_precision_intrinsics.cc | 243 ++++++++ ...e_precision_intrinsics_negative_kernels.cc | 46 ++ catch/unit/math/integer_intrinsics.cc | 320 +++++++++++ .../integer_intrinsics_negative_kernels.cc | 67 +++ .../unit/math/single_precision_intrinsics.cc | 530 ++++++++++++++++++ ...e_precision_intrinsics_negative_kernels.cc | 56 ++ 7 files changed, 1280 insertions(+) create mode 100644 catch/unit/math/double_precision_intrinsics.cc create mode 100644 catch/unit/math/double_precision_intrinsics_negative_kernels.cc create mode 100644 catch/unit/math/integer_intrinsics.cc create mode 100644 catch/unit/math/integer_intrinsics_negative_kernels.cc create mode 100644 catch/unit/math/single_precision_intrinsics.cc create mode 100644 catch/unit/math/single_precision_intrinsics_negative_kernels.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index b0a2b5d00c..e552b9a8a8 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -22,6 +22,9 @@ set(TEST_SRC trig_funcs.cc misc_funcs.cc remainder_and_rounding_funcs.cc + single_precision_intrinsics.cc + double_precision_intrinsics.cc + integer_intrinsics.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -58,3 +61,18 @@ add_test(NAME Unit_Device_rounding_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_rounding_negative_kernels.cc 40) + +add_test(NAME Unit_Single_Precision_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + single_precision_intrinsics_negative_kernels.cc 42) + +add_test(NAME Unit_Double_Precision_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + double_precision_intrinsics_negative_kernels.cc 18) + +add_test(NAME Unit_Integer_Intrinsics_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + integer_intrinsics_negative_kernels.cc 20) diff --git a/catch/unit/math/double_precision_intrinsics.cc b/catch/unit/math/double_precision_intrinsics.cc new file mode 100644 index 0000000000..69e5e2a8d0 --- /dev/null +++ b/catch/unit/math/double_precision_intrinsics.cc @@ -0,0 +1,243 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryDoublePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, double x) + + +static double __drcp_rn_ref(double x) { return 1.0 / x; } + +MATH_UNARY_DP_KERNEL_DEF(__drcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__drcp_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__drcp_rn, __drcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_DP_KERNEL_DEF(__dsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsqrt_rn(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_DP_TEST_DEF_IMPL(__dsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_DP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_DP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(double target, \ + double x1, double x2) + + +static double __dadd_rn_ref(double x1, double x2) { return x1 + x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dadd_rn, __dadd_rn_ref, EqValidatorBuilderFactory()); + + +static double __dsub_rn_ref(double x1, double x2) { return x1 - x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dsub_rn, __dsub_rn_ref, EqValidatorBuilderFactory()); + + +static double __dmul_rn_ref(double x1, double x2) { return x1 * x2; } + +MATH_BINARY_DP_KERNEL_DEF(__dmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__dmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__dmul_rn, __dmul_rn_ref, EqValidatorBuilderFactory()); + + +static double __ddiv_rn_ref(double x1, double x2) { return x1 / x2; } + +MATH_BINARY_DP_KERNEL_DEF(__ddiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__ddiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_DP_TEST_DEF_IMPL(__ddiv_rn, __ddiv_rn_ref, EqValidatorBuilderFactory()); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_DP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(double* const ys, const size_t num_xs, double* const x1s, \ + double* const x2s, double* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_DP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_DP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_DP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder( \ + double target, double x1, double x2, double x3) + + +MATH_TERNARY_DP_KERNEL_DEF(__fma_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fma(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/double_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_DP_TEST_DEF_IMPL(__fma_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/double_precision_intrinsics_negative_kernels.cc b/catch/unit/math/double_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..4ea26ae102 --- /dev/null +++ b/catch/unit/math/double_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,46 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } + +#define INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } + + +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dadd_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dsub_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__dmul_rn) +INTRINSIC_BINARY_DOUBLE_NEGATIVE_KERNELS(__ddiv_rn) +INTRINSIC_UNARY_DOUBLE_NEGATIVE_KERNELS(__dsqrt_rn) \ No newline at end of file diff --git a/catch/unit/math/integer_intrinsics.cc b/catch/unit/math/integer_intrinsics.cc new file mode 100644 index 0000000000..d851577831 --- /dev/null +++ b/catch/unit/math/integer_intrinsics.cc @@ -0,0 +1,320 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +__global__ void __brev_kernel(unsigned int* y, unsigned int x) { y[0] = __brev(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brev(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brev_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __brev_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x55555555); +} + +__global__ void __brevll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __brevll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__brevll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___brevll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __brevll_kernel<<<1, 1>>>(y.ptr(), 0xAAAAAAAAAAAAAAAA); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0x5555555555555555); +} + +template __global__ void __clz_kernel(T* y, T x) { y[0] = __clz(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clz(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clz_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clz_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 32); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __clz_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 31 - i); + } +} + +template __global__ void __clzll_kernel(T* y, T x) { y[0] = __clzll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__clzll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___clzll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __clzll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 64); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __clzll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 63 - i); + } +} + +template __global__ void __ffs_kernel(T* y, T x) { y[0] = __ffs(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffs(x)`. Run for `int` and `unsigned int` overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffs_Sanity_Positive", "", int, unsigned int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffs_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 32; ++i) { + __ffs_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +template __global__ void __ffsll_kernel(T* y, T x) { y[0] = __ffsll(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__ffsll(x)`. Run for `long long int` and `unsigned long long int` + * overloads. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device___ffsll_Sanity_Positive", "", long long int, + unsigned long long int) { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(TestType)); + + __ffsll_kernel<<<1, 1>>>(y.ptr(), static_cast(0)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + TestType x = 1; + for (int i = 0; i < 64; ++i) { + __ffsll_kernel<<<1, 1>>>(y.ptr(), x << i); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popc_kernel(unsigned int* y, unsigned int x) { y[0] = __popc(x); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popc(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + __popc_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned int x = 0; + for (int i = 0; i < 32; ++i) { + __popc_kernel<<<1, 1>>>(y.ptr(), x |= (1u << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __popcll_kernel(unsigned long long int* y, unsigned long long int x) { + y[0] = __popcll(x); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__popcll(x)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___popcll_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long int)); + + __popcll_kernel<<<1, 1>>>(y.ptr(), 0); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == 0); + + unsigned long long int x = 0; + for (int i = 0; i < 64; ++i) { + __popcll_kernel<<<1, 1>>>(y.ptr(), x |= (1ull << i)); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == i + 1); + } +} + +__global__ void __mul24_kernel(int* y, int x1, int x2) { y[0] = __mul24(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} + +__global__ void __umul24_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __umul24(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umul24(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umul24_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFF); + + __umul24_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(y.ptr()[0] == x1 * x2); +} \ No newline at end of file diff --git a/catch/unit/math/integer_intrinsics_negative_kernels.cc b/catch/unit/math/integer_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..ec5ac98fe3 --- /dev/null +++ b/catch/unit/math/integer_intrinsics_negative_kernels.cc @@ -0,0 +1,67 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x) { int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { int result = func_name(x); } + +#define INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x) { long long int result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { long long int result = func_name(x); } + +#define INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(int* x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(int x, int* y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, int y) { int result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(int x, Dummy y) { int result = func_name(x, y); } + +#define INTRINSIC_BINARY_LONGLONG_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(long long int* x, long long int y) { \ + long long int result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(long long int x, long long int* y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, long long int y) { \ + long long int result = func_name##(x, y); \ + } \ + __global__ void func_name##_kernel_v4(long long int x, Dummy y) { \ + long long int result = func_name##(x, y); \ + } + +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__brev) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__clz) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__ffs) +INTRINSIC_UNARY_INT_NEGATIVE_KERNELS(__popc) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__brevll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__clzll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__ffsll) +INTRINSIC_UNARY_LONGLONG_NEGATIVE_KERNELS(__popcll) +INTRINSIC_BINARY_INT_NEGATIVE_KERNELS(__mul24) \ No newline at end of file diff --git a/catch/unit/math/single_precision_intrinsics.cc b/catch/unit/math/single_precision_intrinsics.cc new file mode 100644 index 0000000000..1d9d340c0f --- /dev/null +++ b/catch/unit/math/single_precision_intrinsics.cc @@ -0,0 +1,530 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + +/********** Unary Functions **********/ + +#define MATH_UNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnarySinglePrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x) + + +static float __frcp_rn_ref(float x) { return 1.0f / x; } + +MATH_UNARY_SP_KERNEL_DEF(__frcp_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frcp_rn(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frcp_rn, __frcp_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_KERNEL_DEF(__fsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__fsqrt_rn, static_cast(std::sqrt), + EqValidatorBuilderFactory()); + + +static float __frsqrt_rn_ref(float x) { return 1.0f / std::sqrt(x); } + +MATH_UNARY_SP_KERNEL_DEF(__frsqrt_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__frsqrt_rn(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__frsqrt_rn, __frsqrt_rn_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__expf) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(1.16f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__expf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__expf(x)` for all possible inputs. The results are + * compared against reference function `double std::exp(double)`. The maximum ulp error is `2 + + * floor(abs(1.16 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__expf, static_cast(std::exp)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__exp10f) { + const int64_t ulp_err = 2 + static_cast(std::floor(std::abs(2.95f * x))); + return ULPValidatorBuilderFactory(ulp_err)(target); +} + +MATH_UNARY_SP_KERNEL_DEF(__exp10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__exp10f(x)` for all possible inputs. The results are + * compared against reference function `double exp10(double)`. The maximum ulp error is `2 + + * floor(abs(2.95 * x))`. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__exp10f, static_cast(exp10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__logf) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__logf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__logf(x)` for all possible inputs. The results are + * compared against reference function `double std::log(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-21.41, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__logf, static_cast(std::log)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log2f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -22.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(2)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log2f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log2f(x)` for all possible inputs. The results are + * compared against reference function `double std::log2(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-22, otherwise, the maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log2f, static_cast(std::log2)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__log10f) { + if (0.5f <= x && x <= 2.0f) { + const auto abs_err = std::pow(2.0, -24.0); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return ULPValidatorBuilderFactory(3)(target); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__log10f); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__log10f(x)` for all possible inputs. The results are + * compared against reference function `double std::log10(double)`. For `x` in [0.5, 2], the maximum + * absolute error is 2^-24, otherwise, the maximum ulp error is 3. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__log10f, static_cast(std::log10)); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__sinf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.41); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__sinf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sinf(x)` for all possible inputs. The results are + * compared against reference function `double std::sin(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__sinf, static_cast(std::sin)); + + +__device__ float __sincosf_sin(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return sin; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_sin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `sptr` are compared against reference function `double std::sin(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.41, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_sin, static_cast(std::sin), + __sinf_validator_builder); + + +MATH_UNARY_SP_VALIDATOR_BUILDER_DEF(__cosf) { + if (-M_PI <= x && x <= M_PI) { + const auto abs_err = std::pow(2.0, -21.19); + return AbsValidatorBuilderFactory(abs_err)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_UNARY_SP_KERNEL_DEF(__cosf); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__cosf(x)` for all possible inputs. The results are + * compared against reference function `double std::cos(double)`. For `x` in [-PI, PI], the maximum + * absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF(__cosf, static_cast(std::cos)); + + +__device__ float __sincosf_cos(float x) { + float sin, cos; + __sincosf(x, &sin, &cos); + return cos; +} + +MATH_UNARY_SP_KERNEL_DEF(__sincosf_cos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__sincosf(x, sptr, cptr)` for all possible inputs. The + * results in `cptr` are compared against reference function `double std::cos(double)`. For `x` in + * [-PI, PI], the maximum absolute error is 2^-21.19, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_SP_TEST_DEF_IMPL(__sincosf_cos, static_cast(std::cos), + __cosf_validator_builder); + + +/********** Binary Functions **********/ + +#define MATH_BINARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_SP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2) + + +static float __fadd_rn_ref(float x1, float x2) { return x1 + x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fadd_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fadd_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fadd_rn, __fadd_rn_ref, EqValidatorBuilderFactory()); + + +static float __fsub_rn_ref(float x1, float x2) { return x1 - x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fsub_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fsub_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fsub_rn, __fsub_rn_ref, EqValidatorBuilderFactory()); + + +static float __fmul_rn_ref(float x1, float x2) { return x1 * x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fmul_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmul_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fmul_rn, __fmul_rn_ref, EqValidatorBuilderFactory()); + + +static float __fdiv_rn_ref(float x1, float x2) { return x1 / x2; } + +MATH_BINARY_SP_KERNEL_DEF(__fdiv_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdiv_rn(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF_IMPL(__fdiv_rn, __fdiv_rn_ref, EqValidatorBuilderFactory()); + + +MATH_BINARY_SP_VALIDATOR_BUILDER_DEF(__fdividef) { + const auto abs_x2 = std::abs(x2); + if (std::pow(2.0f, -126.0f) <= abs_x2 && abs_x2 <= std::pow(2.0f, 126.0f)) { + return ULPValidatorBuilderFactory(2)(target); + } else { + return NopValidatorBuilderFactory()(); + } +} + +MATH_BINARY_SP_KERNEL_DEF(__fdividef); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fdividef(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. For `|y|` in [2^-126, 2^126], the + * maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_SP_TEST_DEF(__fdividef, __fdiv_rn_ref); + + +/********** Ternary Functions **********/ + +#define MATH_TERNARY_SP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(float* const ys, const size_t num_xs, float* const x1s, \ + float* const x2s, float* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_SP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_SP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_SP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2, float x3) + + +MATH_TERNARY_SP_KERNEL_DEF(__fmaf_rn); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__fmaf(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/single_precision_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_SP_TEST_DEF_IMPL(__fmaf_rn, static_cast(std::fma), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/single_precision_intrinsics_negative_kernels.cc b/catch/unit/math/single_precision_intrinsics_negative_kernels.cc new file mode 100644 index 0000000000..f293894f83 --- /dev/null +++ b/catch/unit/math/single_precision_intrinsics_negative_kernels.cc @@ -0,0 +1,56 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x) { float result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { float result = func_name(x); } + +#define INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(float* x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(float x, float* y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, float y) { float result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(float x, Dummy y) { float result = func_name(x, y); } + +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__fsqrt_rn) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__expf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__exp10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__logf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log2f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__log10f) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__sinf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__cosf) +INTRINSIC_UNARY_FLOAT_NEGATIVE_KERNELS(__tanf) + +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fadd_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fsub_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fmul_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdiv_rn) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__fdividef) +INTRINSIC_BINARY_FLOAT_NEGATIVE_KERNELS(__powf) \ No newline at end of file From 46ada257307db5e604948478b6325092121e29c1 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 22 Jan 2024 22:21:39 +0530 Subject: [PATCH 13/71] EXSWHTEC-284 - Implement tests for square/cube root device math functions #228 Change-Id: Ic19a440337cf3724f476c464125977b9b30b023e --- catch/unit/math/CMakeLists.txt | 10 + catch/unit/math/math_common.hh | 2 - .../math_root_negative_kernels_1Dand2D.cc | 107 ++++ .../math_root_negative_kernels_3Dand4D.cc | 119 ++++ .../math/math_root_negative_kernels_rtc.hh | 428 +++++++++++++ catch/unit/math/root_funcs.cc | 604 ++++++++++++++++++ 6 files changed, 1268 insertions(+), 2 deletions(-) create mode 100644 catch/unit/math/math_root_negative_kernels_1Dand2D.cc create mode 100644 catch/unit/math/math_root_negative_kernels_3Dand4D.cc create mode 100644 catch/unit/math/math_root_negative_kernels_rtc.hh create mode 100644 catch/unit/math/root_funcs.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index e552b9a8a8..33c4311038 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -25,6 +25,7 @@ set(TEST_SRC single_precision_intrinsics.cc double_precision_intrinsics.cc integer_intrinsics.cc + root_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -76,3 +77,12 @@ add_test(NAME Unit_Integer_Intrinsics_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} integer_intrinsics_negative_kernels.cc 20) +add_test(NAME Unit_Device_root_1Dand2D_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_root_negative_kernels_1Dand2D.cc 68) + +add_test(NAME Unit_Device_root_3Dand4D_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_root_negative_kernels_3Dand4D.cc 56) diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh index 7ebc9b8f5d..010780474f 100644 --- a/catch/unit/math/math_common.hh +++ b/catch/unit/math/math_common.hh @@ -7,10 +7,8 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/catch/unit/math/math_root_negative_kernels_1Dand2D.cc b/catch/unit/math/math_root_negative_kernels_1Dand2D.cc new file mode 100644 index 0000000000..688eaa95be --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_1Dand2D.cc @@ -0,0 +1,107 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(double x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(double x, Dummy y) { double result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(float* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(float x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy y) { float result = func_name##f(x, y); } + +#define NEGATIVE_KERNELS_SHELL_ARRAY_ARG(func_name) \ + __global__ void func_name##_kernel_v1(int* dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v2(Dummy dim, const double* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v3(int dim, const int* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v4(int dim, const char* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v5(int dim, const short* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v6(int dim, const long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v7(int dim, const long long* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v8(int dim, const float* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name(dim, a); \ + } \ + __global__ void func_name##f_kernel_v1(int* dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v2(Dummy dim, const float* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v3(int dim, const int* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v4(int dim, const char* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v5(int dim, const short* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v6(int dim, const long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v7(int dim, const long long* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v8(int dim, const double* a) { \ + float result = func_name##f(dim, a); \ + } \ + __global__ void func_name##f_kernel_v9(int dim, const Dummy* a) { \ + double result = func_name##f(dim, a); \ + } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(sqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rsqrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cbrt) +NEGATIVE_KERNELS_SHELL_ONE_ARG(rcbrt) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(hypot) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(rhypot) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(norm) +NEGATIVE_KERNELS_SHELL_ARRAY_ARG(rnorm) diff --git a/catch/unit/math/math_root_negative_kernels_3Dand4D.cc b/catch/unit/math/math_root_negative_kernels_3Dand4D.cc new file mode 100644 index 0000000000..be8d206af6 --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_3Dand4D.cc @@ -0,0 +1,119 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_THREE_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, double y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(double x, Dummy y, double z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(double x, double y, Dummy z) { \ + double result = func_name(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v4(Dummy x, float y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v5(float x, Dummy y, float z) { \ + float result = func_name##f(x, y, z); \ + } \ + __global__ void func_name##f_kernel_v6(float x, float y, Dummy z) { \ + float result = func_name##f(x, y, z); \ + } + +#define NEGATIVE_KERNELS_SHELL_FOUR_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(double* x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v2(double x, double* y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v3(double x, double y, double* z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v4(double x, double y, double z, double* w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v5(Dummy x, double y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v6(double x, Dummy y, double z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v7(double x, double y, Dummy z, double w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##_kernel_v8(double x, double y, double z, Dummy w) { \ + double result = func_name(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v1(float* x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v2(float x, float* y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v3(float x, float y, float* z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v4(float x, float y, float z, float* w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v5(Dummy x, float y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v6(float x, Dummy y, float z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v7(float x, float y, Dummy z, float w) { \ + float result = func_name##f(x, y, z, w); \ + } \ + __global__ void func_name##f_kernel_v8(float x, float y, float z, Dummy w) { \ + float result = func_name##f(x, y, z, w); \ + } + +NEGATIVE_KERNELS_SHELL_THREE_ARGS(norm3d) +NEGATIVE_KERNELS_SHELL_THREE_ARGS(rnorm3d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(norm4d) +NEGATIVE_KERNELS_SHELL_FOUR_ARGS(rnorm4d) diff --git a/catch/unit/math/math_root_negative_kernels_rtc.hh b/catch/unit/math/math_root_negative_kernels_rtc.hh new file mode 100644 index 0000000000..53507ee23c --- /dev/null +++ b/catch/unit/math/math_root_negative_kernels_rtc.hh @@ -0,0 +1,428 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math root negative Test Cases that are using RTC. +*/ + +static constexpr auto kSqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void sqrt_kernel_v1(double* x) { double result = sqrt(x); } + __global__ void sqrt_kernel_v2(Dummy x) { double result = sqrt(x); } + __global__ void sqrtf_kernel_v1(float* x) { float result = sqrtf(x); } + __global__ void sqrtf_kernel_v2(Dummy x) { float result = sqrtf(x); } +)"}; + +static constexpr auto kRsqrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rsqrt_kernel_v1(double* x) { double result = rsqrt(x); } + __global__ void rsqrt_kernel_v2(Dummy x) { double result = rsqrt(x); } + __global__ void rsqrtf_kernel_v1(float* x) { float result = rsqrtf(x); } + __global__ void rsqrtf_kernel_v2(Dummy x) { float result = rsqrtf(x); } +)"}; + +static constexpr auto kCbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cbrt_kernel_v1(double* x) { double result = cbrt(x); } + __global__ void cbrt_kernel_v2(Dummy x) { double result = cbrt(x); } + __global__ void cbrtf_kernel_v1(float* x) { float result = cbrtf(x); } + __global__ void cbrtf_kernel_v2(Dummy x) { float result = cbrtf(x); } +)"}; + +static constexpr auto kRcbrt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rcbrt_kernel_v1(double* x) { double result = rcbrt(x); } + __global__ void rcbrt_kernel_v2(Dummy x) { double result = rcbrt(x); } + __global__ void rcbrtf_kernel_v1(float* x) { float result = rcbrtf(x); } + __global__ void rcbrtf_kernel_v2(Dummy x) { float result = rcbrtf(x); } +)"}; + +static constexpr auto kHypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void hypot_kernel_v1(double* x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v2(double x, double* y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v3(Dummy x, double y) { double result = hypot(x, y); } + __global__ void hypot_kernel_v4(double x, Dummy y) { double result = hypot(x, y); } + __global__ void hypotf_kernel_v1(float* x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v2(float x, float* y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v3(Dummy x, float y) { float result = hypotf(x, y); } + __global__ void hypotf_kernel_v4(float x, Dummy y) { float result = hypotf(x, y); } +)"}; + +static constexpr auto kRhypot{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rhypot_kernel_v1(double* x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v2(double x, double* y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v3(Dummy x, double y) { double result = rhypot(x, y); } + __global__ void rhypot_kernel_v4(double x, Dummy y) { double result = rhypot(x, y); } + __global__ void rhypotf_kernel_v1(float* x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v2(float x, float* y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v3(Dummy x, float y) { float result = rhypotf(x, y); } + __global__ void rhypotf_kernel_v4(float x, Dummy y) { float result = rhypotf(x, y); } +)"}; + +static constexpr auto kNorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm3d_kernel_v1(double* x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v2(double x, double* y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v3(double x, double y, double* z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v4(Dummy x, double y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v5(double x, Dummy y, double z) { + double result = norm3d(x, y, z); + } + __global__ void norm3d_kernel_v6(double x, double y, Dummy z) { + double result = norm3d(x, y, z); + } + __global__ void norm3df_kernel_v1(float* x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v2(float x, float* y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v3(float x, float y, float* z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v4(Dummy x, float y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v5(float x, Dummy y, float z) { + float result = norm3df(x, y, z); + } + __global__ void norm3df_kernel_v6(float x, float y, Dummy z) { + float result = norm3df(x, y, z); + } +)"}; + +static constexpr auto kRnorm3D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm3d_kernel_v1(double* x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v2(double x, double* y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v3(double x, double y, double* z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v4(Dummy x, double y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v5(double x, Dummy y, double z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3d_kernel_v6(double x, double y, Dummy z) { + double result = rnorm3d(x, y, z); + } + __global__ void rnorm3df_kernel_v1(float* x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v2(float x, float* y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v3(float x, float y, float* z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v4(Dummy x, float y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v5(float x, Dummy y, float z) { + float result = rnorm3df(x, y, z); + } + __global__ void rnorm3df_kernel_v6(float x, float y, Dummy z) { + float result = rnorm3df(x, y, z); + } +)"}; + +static constexpr auto kNorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm4d_kernel_v1(double* x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v2(double x, double* y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v3(double x, double y, double* z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v4(double x, double y, double z, double* w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = norm4d(x, y, z, w); + } + __global__ void norm4df_kernel_v1(float* x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v2(float x, float* y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v3(float x, float y, float* z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v4(float x, float y, float z, float* w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = norm4df(x, y, z, w); + } + __global__ void norm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = norm4df(x, y, z, w); + } +)"}; + +static constexpr auto kRnorm4D{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm4d_kernel_v1(double* x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v2(double x, double* y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v3(double x, double y, double* z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v4(double x, double y, double z, double* w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v5(Dummy x, double y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v6(double x, Dummy y, double z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v7(double x, double y, Dummy z, double w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4d_kernel_v8(double x, double y, double z, Dummy w) { + double result = rnorm4d(x, y, z, w); + } + __global__ void rnorm4df_kernel_v1(float* x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v2(float x, float* y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v3(float x, float y, float* z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v4(float x, float y, float z, float* w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v5(Dummy x, float y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v6(float x, Dummy y, float z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v7(float x, float y, Dummy z, float w) { + float result = rnorm4df(x, y, z, w); + } + __global__ void rnorm4df_kernel_v8(float x, float y, float z, Dummy w) { + float result = rnorm4df(x, y, z, w); + } +)"}; + +static constexpr auto kNorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void norm_kernel_v1(int* dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v2(Dummy dim, const double* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v3(int dim, const int* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v4(int dim, const char* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v5(int dim, const short* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v6(int dim, const long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v7(int dim, const long long* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v8(int dim, const float* a) { + double result = norm(dim, a); + } + __global__ void norm_kernel_v9(int dim, const Dummy* a) { + double result = norm(dim, a); + } + __global__ void normf_kernel_v1(int* dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v2(Dummy dim, const float* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v3(int dim, const int* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v4(int dim, const char* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v5(int dim, const short* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v6(int dim, const long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v7(int dim, const long long* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v8(int dim, const double* a) { + float result = normf(dim, a); + } + __global__ void normf_kernel_v9(int dim, const Dummy* a) { + double result = normf(dim, a); + } +)"}; + +static constexpr auto kRnorm{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void rnorm_kernel_v1(int* dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v2(Dummy dim, const double* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v3(int dim, const int* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v4(int dim, const char* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v5(int dim, const short* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v6(int dim, const long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v7(int dim, const long long* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v8(int dim, const float* a) { + double result = rnorm(dim, a); + } + __global__ void rnorm_kernel_v9(int dim, const Dummy* a) { + double result = rnorm(dim, a); + } + __global__ void rnormf_kernel_v1(int* dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v2(Dummy dim, const float* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v3(int dim, const int* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v4(int dim, const char* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v5(int dim, const short* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v6(int dim, const long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v7(int dim, const long long* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v8(int dim, const double* a) { + float result = rnormf(dim, a); + } + __global__ void rnormf_kernel_v9(int dim, const Dummy* a) { + double result = rnormf(dim, a); + } +)"}; diff --git a/catch/unit/math/root_funcs.cc b/catch/unit/math/root_funcs.cc new file mode 100644 index 0000000000..1638ca8b04 --- /dev/null +++ b/catch/unit/math/root_funcs.cc @@ -0,0 +1,604 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" +#include "quaternary_common.hh" +#include "math_root_negative_kernels_rtc.hh" + +/** + * @addtogroup RootMathFuncs RootMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +MATH_UNARY_KERNEL_DEF(sqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrtf(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrtf_Accuracy_Positive") { + float (*ref)(float) = std::sqrt; + UnarySinglePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `sqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `double std::sqrt(double)`. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_Accuracy_Positive") { + double (*ref)(double) = std::sqrt; + UnaryDoublePrecisionTest(sqrt_kernel, ref, ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for sqrtf and sqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_sqrt_sqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kSqrt); } + +MATH_UNARY_KERNEL_DEF(rsqrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrtf(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrtf_Accuracy_Positive") { + auto rsqrt_ref = [](double arg) -> double { return 1. / std::sqrt(arg); }; + double (*ref)(double) = rsqrt_ref; + UnarySinglePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rsqrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_Accuracy_Positive") { + auto rsqrt_ref = [](long double arg) -> long double { return 1.L / std::sqrt(arg); }; + long double (*ref)(long double) = rsqrt_ref; + UnaryDoublePrecisionTest(rsqrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rsqrtf and rsqrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rsqrt_rsqrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRsqrt); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cbrtf(x)` for all possible inputs and `cbrt(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::cbrt(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_TEST_DEF(cbrt, std::cbrt, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cbrtf and cbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cbrt_cbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kCbrt); } + +MATH_UNARY_KERNEL_DEF(rcbrt) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrtf(x)` for all possible inputs. The maximum ulp error + * is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrtf_Accuracy_Positive") { + auto rcbrt_ref = [](double arg) -> double { return 1. / std::cbrt(arg); }; + double (*ref)(double) = rcbrt_ref; + UnarySinglePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rcbrt(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_Accuracy_Positive") { + auto rcbrt_ref = [](long double arg) -> long double { return 1. / std::cbrt(arg); }; + long double (*ref)(long double) = rcbrt_ref; + UnaryDoublePrecisionTest(rcbrt_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for rcbrtf and rcbrt. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rcbrt_rcbrtf_Negative_RTC") { NegativeTestRTCWrapper<4>(kRcbrt); } + +/********** Binary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hypotf(x, y)` and `hypot(x, y)` against a table of + * difficult values, followed by a large number of randomly generated values. The results are + * compared against reference function `T std::hypot(T, T)`. The maximum ulp error for single + * precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_WITHIN_ULP_TEST_DEF(hypot, std::hypot, 3, 2) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for hypotf and hypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_hypot_hypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kHypot); } + +MATH_BINARY_KERNEL_DEF(rhypot) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rhypotf(x, y)` and `rhypot(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rhypot_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rhypot_ref = [](RT arg1, RT arg2) -> RT { return 1. / std::hypot(arg1, arg2); }; + RT (*ref)(RT, RT) = rhypot_ref; + const auto ulp = std::is_same_v ? 2 : 1; + BinaryFloatingPointTest(rhypot_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rhypotf and rhypot. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rhypot_rhypotf_Negative_RTC") { NegativeTestRTCWrapper<8>(kRhypot); } + +/********** Ternary Functions **********/ + +MATH_TERNARY_KERNEL_DEF(norm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm3df(x, y, z)` and `norm3d(x, y, z)` against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = norm3d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + TernaryFloatingPointTest(norm3d_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm3df and norm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm3d_norm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kNorm3D); } + +MATH_TERNARY_KERNEL_DEF(rnorm3d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm3df(x, y, z)` and `rnorm3d(x, y, z)`against a table of + * difficult values, followed by a large number of randomly generated values. The maximum ulp error + * for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm3d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm3d_ref = [](RT arg1, RT arg2, RT arg3) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3); + }; + RT (*ref)(RT, RT, RT) = rnorm3d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + TernaryFloatingPointTest(rnorm3d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm3df and rnorm3d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm3d_rnorm3df_Negative_RTC") { NegativeTestRTCWrapper<12>(kRnorm3D); } + +/********** Quaternary Functions **********/ + +MATH_QUATERNARY_KERNEL_DEF(norm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `norm4df(x, y, z, t)` and `norm4d(x, y, z, t)` against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 3 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto norm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return std::numeric_limits::infinity(); + } + return std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = norm4d_ref; + const auto ulp = std::is_same_v ? 3 : 2; + QuaternaryFloatingPointTest(norm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for norm4df and norm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm4d_norm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kNorm4D); } + +MATH_QUATERNARY_KERNEL_DEF(rnorm4d) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `rnorm4df(x, y, z, t)` and `rnorm4d(x, y, z, t)`against a + * table of difficult values, followed by a large number of randomly generated values. The maximum + * ulp error for single precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm4d_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm4d_ref = [](RT arg1, RT arg2, RT arg3, RT arg4) -> RT { + if (std::isinf(arg1) || std::isinf(arg2) || std::isinf(arg3) || std::isinf(arg4)) { + return 0; + } + return 1. / std::sqrt(arg1 * arg1 + arg2 * arg2 + arg3 * arg3 + arg4 * arg4); + }; + RT (*ref)(RT, RT, RT, RT) = rnorm4d_ref; + const auto ulp = std::is_same_v ? 2 : 1; + QuaternaryFloatingPointTest(rnorm4d_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnorm4df and rnorm4d. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm4d_rnorm4df_Negative_RTC") { NegativeTestRTCWrapper<16>(kRnorm4D); } + +/********** norm Function **********/ + +#define MATH_NORM_KERNEL_DEF(func_name) \ + template __global__ void func_name##_kernel(T* const ys, int dim, T* const x1s) { \ + if constexpr (std::is_same_v) { \ + *ys = func_name##f(dim, x1s); \ + } else if constexpr (std::is_same_v) { \ + *ys = func_name(dim, x1s); \ + } \ + } + +template +void NormSimpleTest(F kernel, RF ref_func, const ValidatorBuilder& validator_builder) { + const auto max_dim = 10000; + + LinearAllocGuard x{LinearAllocs::hipHostMalloc, max_dim * sizeof(T)}; + LinearAllocGuard x_dev{LinearAllocs::hipMalloc, max_dim * sizeof(T)}; + LinearAllocGuard y{LinearAllocs::hipHostMalloc, sizeof(T)}; + LinearAllocGuard y_dev{LinearAllocs::hipMalloc, sizeof(T)}; + + std::fill_n(x.ptr(), max_dim, 1); + HIP_CHECK(hipMemcpy(x_dev.ptr(), x.ptr(), max_dim * sizeof(T), hipMemcpyHostToDevice)); + + for (uint64_t i = 1u; i < max_dim; i++) { + kernel<<<1, 1>>>(y_dev.ptr(), i, x_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), sizeof(T), hipMemcpyDeviceToHost)); + const auto actual_val = *y.ptr(); + const auto ref_val = static_cast(ref_func(i, x.ptr())); + const auto validator = validator_builder(ref_val); + + if (!validator->match(actual_val)) { + std::stringstream ss; + ss << std::scientific << std::setprecision(std::numeric_limits::max_digits10 - 1); + ss << "Validation fails for dim: " << i << " " << actual_val << " " << ref_val; + INFO(ss.str()); + REQUIRE(false); + } + } +} + +MATH_NORM_KERNEL_DEF(norm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `normf(dim, arr)` and `norm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_norm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto norm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = norm_ref; + + NormSimpleTest(norm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for normf and norm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_norm_normf_Negative_RTC") { NegativeTestRTCWrapper<18>(kNorm); } + +MATH_NORM_KERNEL_DEF(rnorm) + +/** + * Test Description + * ------------------------ + * - Sanity test for `rnormf(dim, arr)` and `rnorm(dim, arr)`. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_rnorm_Sanity_Positive", "", float, double) { + using RT = RefType_t; + auto rnorm_ref = [](int dim, TestType* args) -> RT { + RT sum = 0; + for (int i = 0; i < dim; i++) { + if (std::isinf(args[i])) return std::numeric_limits::infinity(); + sum += static_cast(args[i]) * static_cast(args[i]); + } + return 1. / std::sqrt(sum); + }; + RT (*ref)(int, TestType*) = rnorm_ref; + + NormSimpleTest(rnorm_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for rnormf and rnorm. + * + * Test source + * ------------------------ + * - unit/math/root_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_rnorm_rnormf_Negative_RTC") { NegativeTestRTCWrapper<18>(kRnorm); } From 87d601411b1364fc625c59dd4c053f05726c5313 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 22 Jan 2024 23:41:01 +0530 Subject: [PATCH 14/71] EXSWHTEC-285 - Implement tests for exponential and power device math functions #229 Change-Id: I34ad7ee92960500bcd14dfd7d230ca8f8f77c172 --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/math_pow_negative_kernels.cc | 92 ++++ .../math/math_pow_negative_kernels_rtc.hh | 150 ++++++ catch/unit/math/math_special_values.hh | 9 +- catch/unit/math/pow_common.hh | 134 ++++++ catch/unit/math/pow_funcs.cc | 455 ++++++++++++++++++ 6 files changed, 844 insertions(+), 1 deletion(-) create mode 100644 catch/unit/math/math_pow_negative_kernels.cc create mode 100644 catch/unit/math/math_pow_negative_kernels_rtc.hh create mode 100644 catch/unit/math/pow_common.hh create mode 100644 catch/unit/math/pow_funcs.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 33c4311038..dba9476a0e 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -26,6 +26,7 @@ set(TEST_SRC double_precision_intrinsics.cc integer_intrinsics.cc root_funcs.cc + pow_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -86,3 +87,7 @@ add_test(NAME Unit_Device_root_3Dand4D_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_root_negative_kernels_3Dand4D.cc 56) +add_test(NAME Unit_Device_pow_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_pow_negative_kernels.cc 76) diff --git a/catch/unit/math/math_pow_negative_kernels.cc b/catch/unit/math/math_pow_negative_kernels.cc new file mode 100644 index 0000000000..c338e744a9 --- /dev/null +++ b/catch/unit/math/math_pow_negative_kernels.cc @@ -0,0 +1,92 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_EXP(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_INT_2ND(func_name) \ + __global__ void func_name##_kernel_v1(double* x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v2(Dummy x, int e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v3(double x, int* e) { double result = func_name(x, e); } \ + __global__ void func_name##_kernel_v4(double x, Dummy e) { double result = func_name(x, e); } \ + __global__ void func_name##f_kernel_v1(float* x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v2(Dummy x, int e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v3(float x, int* e) { float result = func_name##f(x, e); } \ + __global__ void func_name##f_kernel_v4(float x, Dummy e) { float result = func_name##f(x, e); } + + +NEGATIVE_KERNELS_SHELL_EXP(exp) +NEGATIVE_KERNELS_SHELL_EXP(exp2) +NEGATIVE_KERNELS_SHELL_EXP(exp10) +NEGATIVE_KERNELS_SHELL_EXP(expm1) + +__global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } +__global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } +__global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } +__global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(ldexp) + +__global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } +__global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } +__global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } +__global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } +__global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } +__global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } + +NEGATIVE_KERNELS_SHELL_INT_2ND(powi) +NEGATIVE_KERNELS_SHELL_INT_2ND(scalbn) + +__global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } +__global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } +__global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } +__global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } diff --git a/catch/unit/math/math_pow_negative_kernels_rtc.hh b/catch/unit/math/math_pow_negative_kernels_rtc.hh new file mode 100644 index 0000000000..7c48640bec --- /dev/null +++ b/catch/unit/math/math_pow_negative_kernels_rtc.hh @@ -0,0 +1,150 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math pow negative Test Cases that are using RTC. +*/ + +static constexpr auto kExp{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp_kernel_v1(double* x) { double result = exp(x); } + __global__ void exp_kernel_v2(Dummy x) { double result = exp(x); } + __global__ void expf_kernel_v1(float* x) { float result = expf(x); } + __global__ void expf_kernel_v2(Dummy x) { float result = expf(x); } +)"}; + +static constexpr auto kExp2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp2_kernel_v1(double* x) { double result = exp2(x); } + __global__ void exp2_kernel_v2(Dummy x) { double result = exp2(x); } + __global__ void exp2f_kernel_v1(float* x) { float result = exp2f(x); } + __global__ void exp2f_kernel_v2(Dummy x) { float result = exp2f(x); } +)"}; + +static constexpr auto kExp10{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void exp10_kernel_v1(double* x) { double result = exp10(x); } + __global__ void exp10_kernel_v2(Dummy x) { double result = exp10(x); } + __global__ void exp10f_kernel_v1(float* x) { float result = exp10f(x); } + __global__ void exp10f_kernel_v2(Dummy x) { float result = exp10f(x); } +)"}; + +static constexpr auto kExpm1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void expm1_kernel_v1(double* x) { double result = expm1(x); } + __global__ void expm1_kernel_v2(Dummy x) { double result = expm1(x); } + __global__ void expm1f_kernel_v1(float* x) { float result = expm1f(x); } + __global__ void expm1f_kernel_v2(Dummy x) { float result = expm1f(x); } +)"}; + +static constexpr auto kFrexp{R"( + __global__ void frexp_kernel_v1(double* x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v2(Dummy x, int* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v3(double x, char* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v4(double x, short* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v5(double x, long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v6(double x, long long* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v7(double x, float* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v8(double x, double* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v9(double x, Dummy* nptr) { double result = frexp(x, nptr); } + __global__ void frexp_kernel_v10(double x, const int* nptr) { double result = frexp(x, nptr); } + __global__ void frexpf_kernel_v1(float* x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v2(Dummy x, int* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v3(float x, char* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v4(float x, short* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v5(float x, long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v6(float x, long long* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v7(float x, float* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v8(float x, double* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v9(float x, Dummy* nptr) { float result = frexpf(x, nptr); } + __global__ void frexpf_kernel_v10(float x, const int* nptr) { float result = frexpf(x, nptr); } +)"}; + +static constexpr auto kLdexp{R"( + __global__ void ldexp_kernel_v1(double* x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v2(Dummy x, int e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v3(double x, int* e) { double result = ldexp(x, e); } + __global__ void ldexp_kernel_v4(double x, Dummy e) { double result = ldexp(x, e); } + __global__ void ldexpf_kernel_v1(float* x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v2(Dummy x, int e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v3(float x, int* e) { float result = ldexpf(x, e); } + __global__ void ldexpf_kernel_v4(float x, Dummy e) { float result = ldexpf(x, e); } +)"}; + +static constexpr auto kPow{R"( + __global__ void pow_kernel_v1(double* x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v2(Dummy x, double e) { double result = pow(x, e); } + __global__ void pow_kernel_v3(double x, double* e) { double result = pow(x, e); } + __global__ void pow_kernel_v4(double x, Dummy e) { double result = pow(x, e); } + __global__ void powf_kernel_v1(float* x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v2(Dummy x, float e) { float result = powf(x, e); } + __global__ void powf_kernel_v3(float x, float* e) { float result = powf(x, e); } + __global__ void powf_kernel_v4(float x, Dummy e) { float result = powf(x, e); } +)"}; + +static constexpr auto kPowi{R"( + __global__ void powi_kernel_v1(double* x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v2(Dummy x, int e) { double result = powi(x, e); } + __global__ void powi_kernel_v3(double x, int* e) { double result = powi(x, e); } + __global__ void powi_kernel_v4(double x, Dummy e) { double result = powi(x, e); } + __global__ void powif_kernel_v1(float* x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v2(Dummy x, int e) { float result = powif(x, e); } + __global__ void powif_kernel_v3(float x, int* e) { float result = powif(x, e); } + __global__ void powif_kernel_v4(float x, Dummy e) { float result = powif(x, e); } +)"}; + +static constexpr auto kScalbn{R"( + __global__ void scalbn_kernel_v1(double* x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v2(Dummy x, int e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v3(double x, int* e) { double result = scalbn(x, e); } + __global__ void scalbn_kernel_v4(double x, Dummy e) { double result = scalbn(x, e); } + __global__ void scalbnf_kernel_v1(float* x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v2(Dummy x, int e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v3(float x, int* e) { float result = scalbnf(x, e); } + __global__ void scalbnf_kernel_v4(float x, Dummy e) { float result = scalbnf(x, e); } +)"}; + +static constexpr auto kScalbln{R"( + __global__ void scalbln_kernel_v1(double* x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v2(Dummy x, long int n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v3(double x, long int* n) { double result = scalbln(x, n); } + __global__ void scalbln_kernel_v4(double x, Dummy n) { double result = scalbln(x, n); } + __global__ void scalblnf_kernel_v1(float* x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v2(Dummy x, long int n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v3(float x, long int* n) { float result = scalblnf(x, n); } + __global__ void scalblnf_kernel_v4(float x, Dummy n) { float result = scalblnf(x, n); } +)"}; diff --git a/catch/unit/math/math_special_values.hh b/catch/unit/math/math_special_values.hh index bc5488fc31..d68a246aca 100644 --- a/catch/unit/math/math_special_values.hh +++ b/catch/unit/math/math_special_values.hh @@ -277,6 +277,12 @@ inline constexpr std::array kSpecialValuesFloat{ +0.0f, }; +inline constexpr std::array kSpecialValuesInt{ + 0, 1, 2, 3, 126, 127, 128, 1022, 1023, 1024, 0x02000001, 0x04000001, 1465264071, 1488522147, + std::numeric_limits::max(), -1, -2, -3, -126, -127, -128, -1022, -1023, -11024, -0x02000001, + -0x04000001, -1465264071, -1488522147, std::numeric_limits::min(), -std::numeric_limits::max() +}; + template struct SpecialVals { const T* const data; const size_t size; @@ -284,4 +290,5 @@ template struct SpecialVals { inline constexpr auto kSpecialValRegistry = std::make_tuple(SpecialVals{kSpecialValuesFloat.data(), kSpecialValuesFloat.size()}, - SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}); + SpecialVals{kSpecialValuesDouble.data(), kSpecialValuesDouble.size()}, + SpecialVals{kSpecialValuesInt.data(), kSpecialValuesInt.size()}); diff --git a/catch/unit/math/pow_common.hh b/catch/unit/math/pow_common.hh new file mode 100644 index 0000000000..95402c72d1 --- /dev/null +++ b/catch/unit/math/pow_common.hh @@ -0,0 +1,134 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_POW_INT_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T1* const x1s, \ + T2* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(x1s[i], x2s[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + } + +template +using kernel_pow_int_sig = void (*)(T1*, const size_t, T1*, T2*); + +template using ref_pow_int_sig = T1 (*)(T1, T2); + +template +void PowIntFloatingPointBruteForceTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) * 2 + sizeof(T2)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x1s, &x2s] { + const auto generator1 = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return static_cast(unif_dist(rng)); + }; + const auto generator2 = [] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution unif_dist(std::numeric_limits::lowest(), + std::numeric_limits::max()); + return unif_dist(rng); + }; + std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator1); + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator2); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void PowIntFloatingPointSpecialValuesTest(kernel_pow_int_sig kernel, + ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values1 = std::get>(kSpecialValRegistry); + const auto values2 = std::get>(kSpecialValRegistry); + + const auto size = values1.size * values2.size; + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(T1)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, size * sizeof(T2)}; + + for (auto i = 0u; i < values1.size; ++i) { + for (auto j = 0u; j < values2.size; ++j) { + x1s.ptr()[i * values2.size + j] = values1.data[i]; + x2s.ptr()[i * values2.size + j] = static_cast(values2.data[j]); + } + } + + MathTest math_test(kernel, size); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, size, x1s.ptr(), + x2s.ptr()); +} + +template +void PowIntFloatingPointTest(kernel_pow_int_sig kernel, ref_pow_int_sig ref_func, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + PowIntFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); + } + + SECTION("Brute force") { PowIntFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } +} \ No newline at end of file diff --git a/catch/unit/math/pow_funcs.cc b/catch/unit/math/pow_funcs.cc new file mode 100644 index 0000000000..1722a26db5 --- /dev/null +++ b/catch/unit/math/pow_funcs.cc @@ -0,0 +1,455 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "pow_common.hh" +#include "math_pow_negative_kernels_rtc.hh" + +/** + * @addtogroup PowMathFuncs PowMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expf(x)` for all possible inputs and `exp(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expf and exp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp_expf_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp2f(x)` for all possible inputs and `exp2(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp2(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(exp2, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp2f and exp2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp2_exp2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp2); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `expm1f(x)` for all possible inputs and `expm1(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::exp(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(expm1, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for expm1f and expm1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_expm1_expm1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExpm1); } + +MATH_UNARY_KERNEL_DEF(exp10) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10f(x)` for all possible inputs. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10f_Accuracy_Positive") { + auto exp10_ref = [](double arg) -> double { return std::pow(10, arg); }; + double (*ref)(double) = exp10_ref; + UnarySinglePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `exp10(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_Accuracy_Positive") { + auto exp10_ref = [](long double arg) -> long double { return std::pow(10, arg); }; + long double (*ref)(long double) = exp10_ref; + UnaryDoublePrecisionTest(exp10_kernel, ref, ULPValidatorBuilderFactory(1)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for exp10f and exp10. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_exp10_exp10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kExp10); } + +template +__global__ void frexp_kernel(std::pair* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i].first = frexpf(xs[i], &ys[i].second); + } else if constexpr (std::is_same_v) { + ys[i].first = frexp(xs[i], &ys[i].second); + } + } +} + +template std::pair frexp_ref(T arg) { + int exp_v; + T res = std::frexp(arg, &exp_v); + return {res, exp_v}; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexpf(x, exp)` for all possible inputs. The results are + * compared against reference function `double std::frexp(double, int*)`. The maximum ulp error is + * 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexpf_Accuracy_Positive") { + UnarySinglePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `frexp(x, exp)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double std::frexp(long double, int*)`. The maximum ulp + * error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_Accuracy_Positive") { + UnaryDoublePrecisionTest( + frexp_kernel, frexp_ref, + PairValidatorBuilderFactory(ULPValidatorBuilderFactory(0), + EqValidatorBuilderFactory())); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for frexpf and frexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_frexp_frexpf_Negative_RTC") { NegativeTestRTCWrapper<20>(kFrexp); } + + +/********** Binary Functions **********/ + +MATH_BINARY_KERNEL_DEF(pow) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powf(x, y)` and `pow(x, y)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_pow_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, RT arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, arg2); + }; + RT (*ref)(RT, RT) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + BinaryFloatingPointTest(pow_kernel, ref, ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powf and pow. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_pow_powf_Negative_RTC") { NegativeTestRTCWrapper<8>(kPow); } + +MATH_POW_INT_KERNEL_DEF(ldexp) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ldexpf(x, exp)` and `ldexp(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::ldexp(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_ldexp_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::ldexp; + PowIntFloatingPointTest(ldexp_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for ldexpf and ldexp. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ldexp_ldexpf_Negative_RTC") { NegativeTestRTCWrapper<8>(kLdexp); } + +MATH_POW_INT_KERNEL_DEF(powi) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `powi(x, exp)` and `powi(x, exp)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::pow(T, T)`. The maximum ulp error + * for single precision is 4 and for double precision is 2. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_powi_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + auto pow_ref = [](RT arg1, int arg2) -> RT { + if (std::isinf(arg1) && arg2 < 0) return 0; + return std::pow(arg1, static_cast(arg2)); + }; + RT (*ref)(RT, int) = pow_ref; + const auto ulp = std::is_same_v ? 4 : 2; + PowIntFloatingPointTest(powi_kernel, ref, + ULPValidatorBuilderFactory(ulp)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for powif and powi. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_powi_powif_Negative_RTC") { NegativeTestRTCWrapper<8>(kPowi); } + +MATH_POW_INT_KERNEL_DEF(scalbn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalbnf(x, n)` and `scalbn(x, n)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbn_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, int) = std::scalbn; + PowIntFloatingPointTest(scalbn_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalbnf and scalbn. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbn_scalbnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbn); } + +MATH_POW_INT_KERNEL_DEF(scalbln) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `scalblnf(x, l)` and `scalbln(x, l)`against a table of + * difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::scalbn(T, long int)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_Device_scalbln_Accuracy_Positive", "", float, double) { + using RT = RefType_t; + RT (*ref)(RT, long int) = std::scalbln; + PowIntFloatingPointTest(scalbln_kernel, ref, + ULPValidatorBuilderFactory(0)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for scalblnf and scalbln. + * + * Test source + * ------------------------ + * - unit/math/pow_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_scalbln_scalblnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kScalbln); } From 95a75cf00f37c20cfaac96a4eb3729b0731934da Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 22 Jan 2024 23:46:23 +0530 Subject: [PATCH 15/71] EXSWHTEC-286 - Implement tests for log device math functions #230 Change-Id: I642a9d865fcc30d7b303b0d4dd05fcd723a59015 --- catch/CMakeLists.txt | 2 +- catch/hipTestMain/config/config_amd_linux | 3 + catch/hipTestMain/config/config_amd_windows | 3 + catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/log_funcs.cc | 260 ++++++++++++++++++ catch/unit/math/math_log_negative_kernels.cc | 39 +++ .../math/math_log_negative_kernels_rtc.hh | 96 +++++++ catch/unit/math/ternary_common.hh | 2 + 8 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 catch/unit/math/log_funcs.cc create mode 100644 catch/unit/math/math_log_negative_kernels.cc create mode 100644 catch/unit/math/math_log_negative_kernels_rtc.hh diff --git a/catch/CMakeLists.txt b/catch/CMakeLists.txt index f31db5f10f..69d9d9aa15 100644 --- a/catch/CMakeLists.txt +++ b/catch/CMakeLists.txt @@ -173,7 +173,7 @@ if (WIN32) endif() if(HIP_PLATFORM STREQUAL "amd") - add_compile_options(-Wall -Wextra -Wvla -Werror -Wno-deprecated -Wno-option-ignored) + add_compile_options(-Wall -Wextra -Wvla -Werror -Wno-deprecated -Wno-option-ignored -Wno-unused-parameter -Wunused-variable) endif() cmake_policy(PUSH) diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 1f38291d20..cf72d409d4 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -131,6 +131,9 @@ "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", + "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipMemAddressReserve_Negative", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 74c5bca32a..b3b396f6ba 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -222,6 +222,9 @@ "NOTE: The following test is disabled due to defect - EXSWHTEC-244", "Unit_hipExtLaunchMultiKernelMultiDevice_Negative_Parameters", "Unit_hipMemAddressFree_negative", + "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", + "Unit_Device_ilogbf_Accuracy_Positive", + "Unit_Device_ilogb_Accuracy_Positive", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index dba9476a0e..a194675b4b 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -27,6 +27,7 @@ set(TEST_SRC integer_intrinsics.cc root_funcs.cc pow_funcs.cc + log_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -91,3 +92,7 @@ add_test(NAME Unit_Device_pow_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_pow_negative_kernels.cc 76) +add_test(NAME Unit_Device_log_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_log_negative_kernels.cc 24) diff --git a/catch/unit/math/log_funcs.cc b/catch/unit/math/log_funcs.cc new file mode 100644 index 0000000000..83ec1806f3 --- /dev/null +++ b/catch/unit/math/log_funcs.cc @@ -0,0 +1,260 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "math_log_negative_kernels_rtc.hh" + +/** + * @addtogroup LogMathFuncs LogMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `logf(x)` for all possible inputs and `log(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for logf and log. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log_logf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log2f(x)` for all possible inputs and `log2(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log2(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log2, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log2f and log2. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log2_log2f_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog2); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log10f(x)` for all possible inputs and `log10(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log10(T)`. The maximum ulp error for single + * precision is 2 and for double precision is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log10, 2, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log10f and log10. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log10_log10f_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog10); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `log1pf(x)` for all possible inputs and `log1p(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::log1p(T)`. The maximum ulp error is 1. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(log1p, 1, 1) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for log1pf and log1p. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_log1p_log1pf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLog1p); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `logb(x)` for all possible inputs and `logb(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::logb(T)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(logb, 0, 0) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for logbf and logb. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_logb_logbf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLogb); } + + +template +__global__ void ilogb_kernel(int* const ys, const size_t num_xs, T* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + if constexpr (std::is_same_v) { + ys[i] = ilogbf(xs[i]); + } else if constexpr (std::is_same_v) { + ys[i] = ilogb(xs[i]); + } + } +} + +template int ilogb_ref(T arg) { + if (arg == 0) { + return std::numeric_limits::min(); + } else if (std::isnan(arg)) { + return std::numeric_limits::min(); + } else if (std::isinf(arg)) { + return std::numeric_limits::max(); + } else { + return std::ilogb(arg); + } +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ilogbf(x)` for all possible inputs. The results are + * compared against reference function `int std::ilogb(double)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogbf_Accuracy_Positive") { + UnarySinglePrecisionTest(ilogb_kernel, ilogb_ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ilogb(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `int std::ilogb(long double)`. The maximum ulp error is 0. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogb_Accuracy_Positive") { + UnaryDoublePrecisionTest(ilogb_kernel, ilogb_ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for ilogbf and ilogb. + * + * Test source + * ------------------------ + * - unit/math/log_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ilogb_ilogbf_Negative_RTC") { NegativeTestRTCWrapper<4>(kIlogb); } diff --git a/catch/unit/math/math_log_negative_kernels.cc b/catch/unit/math/math_log_negative_kernels.cc new file mode 100644 index 0000000000..732fc62a08 --- /dev/null +++ b/catch/unit/math/math_log_negative_kernels.cc @@ -0,0 +1,39 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +NEGATIVE_KERNELS_SHELL(log) +NEGATIVE_KERNELS_SHELL(log2) +NEGATIVE_KERNELS_SHELL(log10) +NEGATIVE_KERNELS_SHELL(log1p) +NEGATIVE_KERNELS_SHELL(logb) +NEGATIVE_KERNELS_SHELL(ilogb) diff --git a/catch/unit/math/math_log_negative_kernels_rtc.hh b/catch/unit/math/math_log_negative_kernels_rtc.hh new file mode 100644 index 0000000000..fd1cbdfcaf --- /dev/null +++ b/catch/unit/math/math_log_negative_kernels_rtc.hh @@ -0,0 +1,96 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math log negative Test Cases that are using RTC. +*/ + +static constexpr auto kLog{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log_kernel_v1(double* x) { double result = log(x); } + __global__ void log_kernel_v2(Dummy x) { double result = log(x); } + __global__ void logf_kernel_v1(float* x) { float result = logf(x); } + __global__ void logf_kernel_v2(Dummy x) { float result = logf(x); } +)"}; + +static constexpr auto kLog2{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log2_kernel_v1(double* x) { double result = log2(x); } + __global__ void log2_kernel_v2(Dummy x) { double result = log2(x); } + __global__ void log2f_kernel_v1(float* x) { float result = log2f(x); } + __global__ void log2f_kernel_v2(Dummy x) { float result = log2f(x); } +)"}; + +static constexpr auto kLog10{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log10_kernel_v1(double* x) { double result = log10(x); } + __global__ void log10_kernel_v2(Dummy x) { double result = log10(x); } + __global__ void log10f_kernel_v1(float* x) { float result = log10f(x); } + __global__ void log10f_kernel_v2(Dummy x) { float result = log10f(x); } +)"}; + +static constexpr auto kLog1p{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void log1p_kernel_v1(double* x) { double result = log1p(x); } + __global__ void log1p_kernel_v2(Dummy x) { double result = log1p(x); } + __global__ void log1pf_kernel_v1(float* x) { float result = log1pf(x); } + __global__ void log1pf_kernel_v2(Dummy x) { float result = log1pf(x); } +)"}; + +static constexpr auto kLogb{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void logb_kernel_v1(double* x) { double result = logb(x); } + __global__ void logb_kernel_v2(Dummy x) { double result = logb(x); } + __global__ void logbf_kernel_v1(float* x) { float result = logbf(x); } + __global__ void logbf_kernel_v2(Dummy x) { float result = logbf(x); } +)"}; + +static constexpr auto kIlogb{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ilogb_kernel_v1(double* x) { double result = ilogb(x); } + __global__ void ilogb_kernel_v2(Dummy x) { double result = ilogb(x); } + __global__ void ilogbf_kernel_v1(float* x) { float result = ilogbf(x); } + __global__ void ilogbf_kernel_v2(Dummy x) { float result = ilogbf(x); } +)"}; diff --git a/catch/unit/math/ternary_common.hh b/catch/unit/math/ternary_common.hh index a335073916..4bc7fe26cc 100644 --- a/catch/unit/math/ternary_common.hh +++ b/catch/unit/math/ternary_common.hh @@ -139,4 +139,6 @@ void TernaryFloatingPointTest(kernel_sig kernel, ref_sig, ref, \ ULPValidatorBuilderFactory(ulp)); \ + \ } + From ce167afa87300d94ce7d67807cee3d3793081365 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 24 Jan 2024 00:30:42 +0530 Subject: [PATCH 16/71] EXSWHTEC-288 - Implement tests for special-purpose device math function #235 Change-Id: Ifa601f462a5291ef6454240191d10e245eef8d7c --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/math_special_func_kernels.cc | 60 + .../math/math_special_func_kernels_rtc.hh | 236 ++++ catch/unit/math/special_common.hh | 145 +++ catch/unit/math/special_funcs.cc | 1117 +++++++++++++++++ 5 files changed, 1563 insertions(+) create mode 100644 catch/unit/math/math_special_func_kernels.cc create mode 100644 catch/unit/math/math_special_func_kernels_rtc.hh create mode 100644 catch/unit/math/special_common.hh create mode 100644 catch/unit/math/special_funcs.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index a194675b4b..1a9cd98f89 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -28,6 +28,7 @@ set(TEST_SRC root_funcs.cc pow_funcs.cc log_funcs.cc + special_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -96,3 +97,7 @@ add_test(NAME Unit_Device_log_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_log_negative_kernels.cc 24) +add_test(NAME Unit_Device_special_funcs_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + math_special_func_kernels.cc 76) diff --git a/catch/unit/math/math_special_func_kernels.cc b/catch/unit/math/math_special_func_kernels.cc new file mode 100644 index 0000000000..7dec752ddc --- /dev/null +++ b/catch/unit/math/math_special_func_kernels.cc @@ -0,0 +1,60 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name) \ + __global__ void func_name##_kernel_v1(double* x) { double result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { double result = func_name(x); } \ + __global__ void func_name##f_kernel_v1(float* x) { float result = func_name##f(x); } \ + __global__ void func_name##f_kernel_v2(Dummy x) { float result = func_name##f(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name) \ + __global__ void func_name##_kernel_v1(int* x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(int x, double* y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, double y) { double result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(int x, Dummy y) { double result = func_name(x, y); } \ + __global__ void func_name##f_kernel_v1(int* x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v2(int x, float* y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v3(Dummy x, float y) { float result = func_name##f(x, y); } \ + __global__ void func_name##f_kernel_v4(int x, Dummy y) { float result = func_name##f(x, y); } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(erf) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfc) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfcinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(erfcx) +NEGATIVE_KERNELS_SHELL_ONE_ARG(normcdf) +NEGATIVE_KERNELS_SHELL_ONE_ARG(normcdfinv) +NEGATIVE_KERNELS_SHELL_ONE_ARG(lgamma) +NEGATIVE_KERNELS_SHELL_ONE_ARG(tgamma) +NEGATIVE_KERNELS_SHELL_ONE_ARG(j0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(j1) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(jn) +NEGATIVE_KERNELS_SHELL_ONE_ARG(y0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(y1) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(yn) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cyl_bessel_i0) +NEGATIVE_KERNELS_SHELL_ONE_ARG(cyl_bessel_i1) diff --git a/catch/unit/math/math_special_func_kernels_rtc.hh b/catch/unit/math/math_special_func_kernels_rtc.hh new file mode 100644 index 0000000000..e829db2f3a --- /dev/null +++ b/catch/unit/math/math_special_func_kernels_rtc.hh @@ -0,0 +1,236 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the math special function negative Test Cases that are using RTC. +*/ + +static constexpr auto kErf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erf_kernel_v1(double* x) { double result = erf(x); } + __global__ void erf_kernel_v2(Dummy x) { double result = erf(x); } + __global__ void erff_kernel_v1(float* x) { float result = erff(x); } + __global__ void erff_kernel_v2(Dummy x) { float result = erff(x); } +)"}; + +static constexpr auto kErfc{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfc_kernel_v1(double* x) { double result = erfc(x); } + __global__ void erfc_kernel_v2(Dummy x) { double result = erfc(x); } + __global__ void erfcf_kernel_v1(float* x) { float result = erfcf(x); } + __global__ void erfcf_kernel_v2(Dummy x) { float result = erfcf(x); } +)"}; + +static constexpr auto kErfinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfinv_kernel_v1(double* x) { double result = erfinv(x); } + __global__ void erfinv_kernel_v2(Dummy x) { double result = erfinv(x); } + __global__ void erfinvf_kernel_v1(float* x) { float result = erfinvf(x); } + __global__ void erfinvf_kernel_v2(Dummy x) { float result = erfinvf(x); } +)"}; + +static constexpr auto kErfcinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfcinv_kernel_v1(double* x) { double result = erfcinv(x); } + __global__ void erfcinv_kernel_v2(Dummy x) { double result = erfcinv(x); } + __global__ void erfcinvf_kernel_v1(float* x) { float result = erfcinvf(x); } + __global__ void erfcinvf_kernel_v2(Dummy x) { float result = erfcinvf(x); } +)"}; + +static constexpr auto kErfcx{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void erfcx_kernel_v1(double* x) { double result = erfcx(x); } + __global__ void erfcx_kernel_v2(Dummy x) { double result = erfcx(x); } + __global__ void erfcxf_kernel_v1(float* x) { float result = erfcxf(x); } + __global__ void erfcxf_kernel_v2(Dummy x) { float result = erfcxf(x); } +)"}; + +static constexpr auto kNormcdf{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void normcdf_kernel_v1(double* x) { double result = normcdf(x); } + __global__ void normcdf_kernel_v2(Dummy x) { double result = normcdf(x); } + __global__ void normcdff_kernel_v1(float* x) { float result = normcdff(x); } + __global__ void normcdff_kernel_v2(Dummy x) { float result = normcdff(x); } +)"}; + +static constexpr auto kNormcdfinv{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void normcdfinv_kernel_v1(double* x) { double result = normcdfinv(x); } + __global__ void normcdfinv_kernel_v2(Dummy x) { double result = normcdfinv(x); } + __global__ void normcdfinvf_kernel_v1(float* x) { float result = normcdfinvf(x); } + __global__ void normcdfinvf_kernel_v2(Dummy x) { float result = normcdfinvf(x); } +)"}; + +static constexpr auto kLgamma{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void lgamma_kernel_v1(double* x) { double result = lgamma(x); } + __global__ void lgamma_kernel_v2(Dummy x) { double result = lgamma(x); } + __global__ void lgammaf_kernel_v1(float* x) { float result = lgammaf(x); } + __global__ void lgammaf_kernel_v2(Dummy x) { float result = lgammaf(x); } +)"}; + +static constexpr auto kTgamma{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void tgamma_kernel_v1(double* x) { double result = tgamma(x); } + __global__ void tgamma_kernel_v2(Dummy x) { double result = tgamma(x); } + __global__ void tgammaf_kernel_v1(float* x) { float result = tgammaf(x); } + __global__ void tgammaf_kernel_v2(Dummy x) { float result = tgammaf(x); } +)"}; + +static constexpr auto kJ0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void j0_kernel_v1(double* x) { double result = j0(x); } + __global__ void j0_kernel_v2(Dummy x) { double result = j0(x); } + __global__ void j0f_kernel_v1(float* x) { float result = j0f(x); } + __global__ void j0f_kernel_v2(Dummy x) { float result = j0f(x); } +)"}; + +static constexpr auto kJ1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void j1_kernel_v1(double* x) { double result = j1(x); } + __global__ void j1_kernel_v2(Dummy x) { double result = j1(x); } + __global__ void j1f_kernel_v1(float* x) { float result = j1f(x); } + __global__ void j1f_kernel_v2(Dummy x) { float result = j1f(x); } +)"}; + +static constexpr auto kJn{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void jn_kernel_v1(int* x, double y) { double result = jn(x, y); } + __global__ void jn_kernel_v2(int x, double* y) { double result = jn(x, y); } + __global__ void jn_kernel_v3(Dummy x, double y) { double result = jn(x, y); } + __global__ void jn_kernel_v4(int x, Dummy y) { double result = jn(x, y); } + __global__ void jnf_kernel_v1(int* x, float y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v2(int x, float* y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v3(Dummy x, float y) { float result = jnf(x, y); } + __global__ void jnf_kernel_v4(int x, Dummy y) { float result = jnf(x, y); } +)"}; + +static constexpr auto kY0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void y0_kernel_v1(double* x) { double result = y0(x); } + __global__ void y0_kernel_v2(Dummy x) { double result = y0(x); } + __global__ void y0f_kernel_v1(float* x) { float result = y0f(x); } + __global__ void y0f_kernel_v2(Dummy x) { float result = y0f(x); } +)"}; + +static constexpr auto kY1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void y1_kernel_v1(double* x) { double result = y1(x); } + __global__ void y1_kernel_v2(Dummy x) { double result = y1(x); } + __global__ void y1f_kernel_v1(float* x) { float result = y1f(x); } + __global__ void y1f_kernel_v2(Dummy x) { float result = y1f(x); } +)"}; + +static constexpr auto kYn{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void yn_kernel_v1(int* x, double y) { double result = yn(x, y); } + __global__ void yn_kernel_v2(int x, double* y) { double result = yn(x, y); } + __global__ void yn_kernel_v3(Dummy x, double y) { double result = yn(x, y); } + __global__ void yn_kernel_v4(int x, Dummy y) { double result = yn(x, y); } + __global__ void ynf_kernel_v1(int* x, float y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v2(int x, float* y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v3(Dummy x, float y) { float result = ynf(x, y); } + __global__ void ynf_kernel_v4(int x, Dummy y) { float result = ynf(x, y); } +)"}; + +static constexpr auto kCylBesselI0{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cyl_bessel_i0_kernel_v1(double* x) { double result = cyl_bessel_i0(x); } + __global__ void cyl_bessel_i0_kernel_v2(Dummy x) { double result = cyl_bessel_i0(x); } + __global__ void cyl_bessel_i0f_kernel_v1(float* x) { float result = cyl_bessel_i0f(x); } + __global__ void cyl_bessel_i0f_kernel_v2(Dummy x) { float result = cyl_bessel_i0f(x); } +)"}; + +static constexpr auto kCylBesselI1{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void cyl_bessel_i1_kernel_v1(double* x) { double result = cyl_bessel_i1(x); } + __global__ void cyl_bessel_i1_kernel_v2(Dummy x) { double result = cyl_bessel_i1(x); } + __global__ void cyl_bessel_i1f_kernel_v1(float* x) { float result = cyl_bessel_i1f(x); } + __global__ void cyl_bessel_i1f_kernel_v2(Dummy x) { float result = cyl_bessel_i1f(x); } +)"}; diff --git a/catch/unit/math/special_common.hh b/catch/unit/math/special_common.hh new file mode 100644 index 0000000000..4b55a88fee --- /dev/null +++ b/catch/unit/math/special_common.hh @@ -0,0 +1,145 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "math_special_values.hh" + +#include + +namespace cg = cooperative_groups; + +#define MATH_BESSEL_N_KERNEL_DEF(func_name) \ + template \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, int* n, T* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + if constexpr (std::is_same_v) { \ + ys[i] = func_name##f(n[i], xs[i]); \ + } else if constexpr (std::is_same_v) { \ + ys[i] = func_name(n[i], xs[i]); \ + } \ + } \ + } + +template using kernel_bessel_n_sig = void (*)(T*, const size_t, int*, T*); + +template using ref_bessel_n_sig = T (*)(int, T); + +template +void BesselDoublePrecisionBruteForceTest(kernel_bessel_n_sig kernel, + ref_bessel_n_sig ref_func, + const ValidatorBuilder& validator_builder, int n_input = 0, + const double a = std::numeric_limits::lowest(), + const double b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = std::min( + GetMaxAllowedDeviceMemoryUsage() / (sizeof(double) * 2 + sizeof(int)), num_iterations); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(double)}; + + MathTest math_test(kernel, max_batch_size); + std::fill_n(x1s.ptr(), max_batch_size, n_input); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &x2s] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, x1s.ptr(), + x2s.ptr()); + } +} + +template +void BesselSinglePrecisionRangeTest(kernel_bessel_n_sig kernel, + ref_bessel_n_sig ref_func, + const ValidatorBuilder& validator_builder, int n_input, + const float a, const float b) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(float) * 2 + sizeof(int)); + LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(int)}; + LinearAllocGuard x2s{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(float)}; + + MathTest math_test(kernel, max_batch_size); + std::fill_n(x1s.ptr(), max_batch_size, n_input); + + size_t inserted = 0u; + for (float v = a; v != b; v = std::nextafter(v, b)) { + x2s.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, x1s.ptr(), + x2s.ptr()); + inserted = 0u; + } +} + +template +void SpecialSimpleTest(F kernel, const ValidatorBuilder& validator_builder, const T* x, + const T* ref, size_t num_args) { + LinearAllocGuard x_dev{LinearAllocs::hipMalloc, num_args * sizeof(T)}; + LinearAllocGuard y{LinearAllocs::hipHostMalloc, num_args * sizeof(T)}; + LinearAllocGuard y_dev{LinearAllocs::hipMalloc, num_args * sizeof(T)}; + + HIP_CHECK(hipMemcpy(x_dev.ptr(), x, num_args * sizeof(T), hipMemcpyHostToDevice)); + + kernel<<<1, num_args>>>(y_dev.ptr(), num_args, x_dev.ptr()); + HIP_CHECK(hipGetLastError()); + + HIP_CHECK(hipMemcpy(y.ptr(), y_dev.ptr(), num_args * sizeof(T), hipMemcpyDeviceToHost)); + + for (auto i = 0u; i < num_args; ++i) { + const auto actual_val = y.ptr()[i]; + const auto ref_val = ref[i]; + const auto validator = validator_builder(ref_val); + + if (!validator->match(actual_val)) { + std::stringstream ss; + ss << "Input value(s): " << std::scientific + << std::setprecision(std::numeric_limits::max_digits10 - 1); + ss << x[i] << " " << actual_val << " " << ref_val << "\n"; + INFO(ss.str()); + REQUIRE(false); + } + } +} diff --git a/catch/unit/math/special_funcs.cc b/catch/unit/math/special_funcs.cc new file mode 100644 index 0000000000..5461afadd1 --- /dev/null +++ b/catch/unit/math/special_funcs.cc @@ -0,0 +1,1117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "unary_common.hh" +#include "binary_common.hh" +#include "special_common.hh" +#include "math_special_func_kernels_rtc.hh" + +#include + + +/** + * @addtogroup SpecialMathFuncs SpecialMathFuncs + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erff(x)` for all possible inputs and `erf(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::erf(T)`. The maximum ulp error is 2. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(erf, 2, 2) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erff and erf. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erf_erff_Negative_RTC") { NegativeTestRTCWrapper<4>(kErf); } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcf(x)` for all possible inputs and `erfc(x)` against a + * table of difficult values, followed by a large number of randomly generated values. The results + * are compared against reference function `T std::erfc(T)`. The maximum ulp error for single + * precision is 4 and for double precision is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_WITHIN_ULP_STL_REF_TEST_DEF(erfc, 4, 5) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcf and erfc. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfc_erfcf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfc); } + +MATH_UNARY_KERNEL_DEF(erfinv) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfinvf(x)` for all possible inputs. The results are + * compared against reference function `double boost::math::erf_inv(double)`. The maximum ulp error + * is 2. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinvf_Accuracy_Positive") { + auto erfinv_ref = [](double arg) -> double { + if (arg == 0) return 0; + if (arg == 1) + return std::numeric_limits::infinity(); + else if (arg == -1) + return -std::numeric_limits::infinity(); + else if (arg < -1 || arg > 1) + return std::numeric_limits::quiet_NaN(); + return boost::math::erf_inv(arg); + }; + double (*ref)(double) = erfinv_ref; + UnarySinglePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(2)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfinv(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double boost::math::erf_inv(long double)`. The maximum + * ulp error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinv_Accuracy_Positive") { + auto erfinv_ref = [](long double arg) -> long double { + if (arg == 0) return 0; + if (arg == 1) + return std::numeric_limits::infinity(); + else if (arg == -1) + return -std::numeric_limits::infinity(); + else if (arg < -1 || arg > 1) + return std::numeric_limits::quiet_NaN(); + return boost::math::erf_inv(arg); + }; + long double (*ref)(long double) = erfinv_ref; + UnaryDoublePrecisionTest(erfinv_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfinvf and erfinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfinv_erfinvf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfinv); } + +MATH_UNARY_KERNEL_DEF(erfcinv) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcinvf(x)` for all possible inputs. The results are + * compared against reference function `double boost::math::erfc_inv(double)`. The maximum ulp error + * is 4. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinvf_Accuracy_Positive") { + auto erfcinv_ref = [](double arg) -> double { + if (arg == 0) + return std::numeric_limits::infinity(); + else if (arg == 2) + return -std::numeric_limits::infinity(); + else if (arg < 0 || arg > 2) + return std::numeric_limits::quiet_NaN(); + return boost::math::erfc_inv(arg); + }; + double (*ref)(double) = erfcinv_ref; + UnarySinglePrecisionTest(erfcinv_kernel, ref, ULPValidatorBuilderFactory(4)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `erfcinv(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double boost::math::erfc_inv(long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinv_Accuracy_Positive") { + auto erfcinv_ref = [](long double arg) -> long double { + if (arg == 0) + return std::numeric_limits::infinity(); + else if (arg == 2) + return -std::numeric_limits::infinity(); + else if (arg < 0 || arg > 2) + return std::numeric_limits::quiet_NaN(); + return boost::math::erfc_inv(arg); + }; + long double (*ref)(long double) = erfcinv_ref; + UnaryDoublePrecisionTest(erfcinv_kernel, ref, ULPValidatorBuilderFactory(6)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcinvf and erfcinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcinv_erfcinvf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfcinv); } + +MATH_UNARY_KERNEL_DEF(erfcx) + +/** + * Test Description + * ------------------------ + * - Sanity test for `erfcxf(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcxf_Sanity_Positive") { + constexpr std::array input{-std::numeric_limits::infinity(), + -1000.f, + -100.f, + -5.f, + -0.5f, + 0., + 0.75f, + 15.f, + 200.f, + 500.f, + std::numeric_limits::infinity()}; + constexpr std::array reference{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + 1.44009806e11f, + 1.95236027f, + 1.0f, + 5.06937683e-1f, + 3.75296101e-2f, + 2.82091252e-3f, + 1.12837693e-3f, + 0.f}; + SpecialSimpleTest(erfcx_kernel, ULPValidatorBuilderFactory(4), input.data(), + reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `erfcx(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcx_Sanity_Positive") { + constexpr std::array input{ + -std::numeric_limits::infinity(), -1000., -100., -5., -0.5, 0., 0.75, 15., 200., 500., + std::numeric_limits::infinity()}; + constexpr std::array reference{std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + std::numeric_limits::infinity(), + 1.4400979867466104e11, + 1.9523604891825568, + 1.0, + 5.0693765029314475e-1, + 3.7529606388505762e-2, + 2.8209126572120466e-3, + 1.1283769103507188e-3, + 0.}; + SpecialSimpleTest(erfcx_kernel, ULPValidatorBuilderFactory(4), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for erfcxf and erfcx. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_erfcx_erfcxf_Negative_RTC") { NegativeTestRTCWrapper<4>(kErfcx); } + +MATH_UNARY_KERNEL_DEF(normcdf) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `normcdff(x)` for all possible inputs. The maximum ulp error + * is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdff_Accuracy_Positive") { + auto normcdf_ref = [](double arg) -> double { return std::erfc(-arg / std::sqrt(2)) / 2; }; + double (*ref)(double) = normcdf_ref; + UnarySinglePrecisionTest(normcdf_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `normcdf(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The maximum ulp error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdf_Accuracy_Positive") { + auto normcdf_ref = [](long double arg) -> long double { + return std::erfc(-arg / std::sqrt(2.L)) / 2; + }; + long double (*ref)(long double) = normcdf_ref; + UnaryDoublePrecisionTest(normcdf_kernel, ref, ULPValidatorBuilderFactory(5)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for normcdff and normcdf. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdf_normcdff_Negative_RTC") { NegativeTestRTCWrapper<4>(kNormcdf); } + +MATH_UNARY_KERNEL_DEF(normcdfinv) + +/** + * Test Description + * ------------------------ + * - Sanity test for `normcdfinvf(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinvf_Sanity_Positive") { + constexpr std::array input{0.f, 0.1f, 0.25f, 0.4f, 0.5f, 0.6f, 0.75f, 0.9f, 1.f}; + constexpr std::array reference{-std::numeric_limits::infinity(), + -1.28155160f, + -0.674489737f, + -0.253347069f, + 0, + 0.253347158f, + 0.674489737f, + 1.28155148f, + std::numeric_limits::infinity()}; + SpecialSimpleTest(normcdfinv_kernel, ULPValidatorBuilderFactory(5), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `normcdfinv(x)`. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinv_Sanity_Positive") { + constexpr std::array input{0., 0.1, 0.25, 0.4, 0.5, 0.6, 0.75, 0.9, 1.}; + constexpr std::array reference{-std::numeric_limits::infinity(), + -1.2815515655446004, + -0.67448975019608159, + -0.25334710313579972, + 0, + 0.25334710313579972, + 0.67448975019608159, + 1.2815515655446006, + std::numeric_limits::infinity()}; + SpecialSimpleTest(normcdfinv_kernel, ULPValidatorBuilderFactory(5), + input.data(), reference.data(), input.size()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for normcdfinvf and normcdfinv. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_normcdfinv_normcdfinvf_Negative_RTC") { + NegativeTestRTCWrapper<4>(kNormcdfinv); +} + +MATH_UNARY_KERNEL_DEF(tgamma) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `tgammaf(x)` for all possible inputs below 171.7 and that + * are not very small negative numbers, as they lead to overflow for IEEE compatible double. The + * results are compared against reference function `double std::tgamma(double)`. The maximum ulp + * error is 5. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgammaf_Accuracy_Limited_Positive") { + double (*ref)(double) = std::tgamma; + UnarySinglePrecisionRangeTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(5), + std::numeric_limits::lowest(), -0.001f); + UnarySinglePrecisionRangeTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(5), 0, + 171.7); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `tgamma(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double std::tgamma(long double)`. The maximum ulp error + * is 10. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgamma_Accuracy_Limited_Positive") { + long double (*ref)(long double) = std::tgamma; + UnaryDoublePrecisionTest(tgamma_kernel, ref, ULPValidatorBuilderFactory(10)); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for tgammaf and tgamma. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_tgamma_tgammaf_Negative_RTC") { NegativeTestRTCWrapper<4>(kTgamma); } + +MATH_UNARY_KERNEL_DEF(lgamma) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `lgammaf(x)` for all possible inputs. The results are + * compared against reference function `double std::lgamma(double)`. For `x` outside interval + * -11.0001 … -2.2637, the maximum ulp error is 4, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgammaf_Accuracy_Limited_Positive") { + double (*ref)(double) = std::lgamma; + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + std::numeric_limits::lowest(), -11.0001f); + UnarySinglePrecisionRangeTest(lgamma_kernel, ref, ULPValidatorBuilderFactory(6), + -2.2636f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `lgamma(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `long double std::lgamma(long double)`. For `x` outside interval -11.0001 … + * -2.2637, the maximum ulp error is 4, and larger otherwise. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgamma_Accuracy_Limited_Positive") { + long double (*ref)(long double) = std::lgamma; + UnaryDoublePrecisionBruteForceTest(lgamma_kernel, ref, + ULPValidatorBuilderFactory(4), + std::numeric_limits::lowest(), -11.0001); + UnaryDoublePrecisionBruteForceTest(lgamma_kernel, ref, + ULPValidatorBuilderFactory(4), -2.2636, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for lgammaf and lgamma. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_lgamma_lgammaf_Negative_RTC") { NegativeTestRTCWrapper<4>(kLgamma); } + +MATH_UNARY_KERNEL_DEF(cyl_bessel_i0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i0f(x)` for all possible inputs in range [0, + * 10000). The results are compared against reference function `double std::cyl_bessel_i(0, + * double)`. The maximum ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0f_Accuracy_Limited_Positive") { + auto cyl_bessel_i0_ref = [](double arg) -> double { return std::cyl_bessel_i(0, arg); }; + double (*ref)(double) = cyl_bessel_i0_ref; + UnarySinglePrecisionRangeTest(cyl_bessel_i0_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i0(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range [0, 10000). The results are + * compared against reference function `long double std::cyl_bessel_i(0, long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0_Accuracy_Limited_Positive") { + auto cyl_bessel_i0_ref = [](long double arg) -> long double { return std::cyl_bessel_i(0, arg); }; + long double (*ref)(long double) = cyl_bessel_i0_ref; + UnaryDoublePrecisionBruteForceTest(cyl_bessel_i0_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cyl_bessel_i0f and cyl_bessel_i0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i0_cyl_bessel_i0f_Negative_RTC") { + NegativeTestRTCWrapper<4>(kCylBesselI0); +} + +MATH_UNARY_KERNEL_DEF(cyl_bessel_i1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i1f(x)` for all possible inputs in range [0, + * 10000). The results are compared against reference function `double std::cyl_bessel_i(1, + * double)`. The maximum ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1f_Accuracy_Limited_Positive") { + auto cyl_bessel_i1_ref = [](double arg) -> double { return std::cyl_bessel_i(1, arg); }; + double (*ref)(double) = cyl_bessel_i1_ref; + UnarySinglePrecisionRangeTest(cyl_bessel_i1_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `cyl_bessel_i1(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range [0, 10000). The results are + * compared against reference function `long double std::cyl_bessel_i(1, long double)`. The maximum + * ulp error is 6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1_Accuracy_Limited_Positive") { + auto cyl_bessel_i1_ref = [](long double arg) -> long double { return std::cyl_bessel_i(1, arg); }; + long double (*ref)(long double) = cyl_bessel_i1_ref; + UnaryDoublePrecisionBruteForceTest(cyl_bessel_i1_kernel, ref, + ULPValidatorBuilderFactory(6), 0, 10000); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for cyl_bessel_i1f and cyl_bessel_i1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_cyl_bessel_i1_cyl_bessel_i1f_Negative_RTC") { + NegativeTestRTCWrapper<4>(kCylBesselI1); +} + +/********** Bessel Functions **********/ + +MATH_UNARY_KERNEL_DEF(y0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y0f(x)` for all possible inputs. The results are + * compared against reference function `double y0(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = y0; +#elif _WIN64 + double (*ref)(double) = _y0; +#endif + UnarySinglePrecisionRangeTest(y0_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(y0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y0(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double y0l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = y0l; +#elif _WIN64 + long double (*ref)(long double) = _y0l; +#endif + UnaryDoublePrecisionBruteForceTest(y0_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), -8., + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for y0f and y0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y0_y0f_Negative_RTC") { NegativeTestRTCWrapper<4>(kY0); } + +MATH_UNARY_KERNEL_DEF(y1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y1f(x)` for all possible inputs. The results are + * compared against reference function `double y1(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = y1; +#elif _WIN64 + double (*ref)(double) = _y1; +#endif + UnarySinglePrecisionRangeTest(y1_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(y1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `y1(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double y1l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = y1l; +#elif _WIN64 + long double (*ref)(long double) = _y1l; +#endif + UnaryDoublePrecisionBruteForceTest(y1_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), -8., + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for y1f and y1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_y1_y1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kY1); } + +MATH_BESSEL_N_KERNEL_DEF(yn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `ynf(x)` for all possible inputs and n equal to 5, 25 or + * 120. The results are compared against reference function `double yn(int, double)`. For `x` larger + * than n, the maximum absolute error is 2.2x10^-6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_ynf_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(int, double) = yn; +#elif _WIN64 + double (*ref)(int, double) = _yn; +#endif + int n = GENERATE(5, 25, 120); + BesselSinglePrecisionRangeTest(yn_kernel, ref, AbsValidatorBuilderFactory(0.0000022), n, n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `yn(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range and n equal to 5, 25, or 120. + * The results are compared against reference function `long double ynl(int, long double)`. For `x` + * larger than 1.5n, the maximum absolute error is 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_yn_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(int, long double) = ynl; +#elif _WIN64 + long double (*ref)(int, long double) = _ynl; +#endif + int n = GENERATE(5, 25, 120); + BesselDoublePrecisionBruteForceTest(yn_kernel, ref, + AbsValidatorBuilderFactory(5.e-12), n, 1.5 * n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for ynf and yn. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_yn_ynf_Negative_RTC") { NegativeTestRTCWrapper<8>(kYn); } + +MATH_UNARY_KERNEL_DEF(j0) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j0f(x)` for all possible inputs. The results are + * compared against reference function `double j0(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = j0; +#elif _WIN64 + double (*ref)(double) = _j0; +#endif + UnarySinglePrecisionRangeTest(j0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + std::numeric_limits::lowest(), -8.f); + UnarySinglePrecisionRangeTest(j0_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(j0_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j0(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double j0l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = j0l; +#elif _WIN64 + long double (*ref)(long double) = _j0l; +#endif + UnaryDoublePrecisionBruteForceTest( + j0_kernel, ref, AbsValidatorBuilderFactory(5.e-12), + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for j0f and j0. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j0_j0f_Negative_RTC") { NegativeTestRTCWrapper<4>(kJ0); } + +MATH_UNARY_KERNEL_DEF(j1) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j1f(x)` for all possible inputs. The results are + * compared against reference function `double j1(double)`. For `x` outside [-8, 8], the maximum + * absolute error is 2.2x10^-6, otherwise, the maximum ulp error is 9. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1f_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(double) = j1; +#elif _WIN64 + double (*ref)(double) = _j1; +#endif + UnarySinglePrecisionRangeTest(j1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + std::numeric_limits::lowest(), -8.f); + UnarySinglePrecisionRangeTest(j1_kernel, ref, ULPValidatorBuilderFactory(9), -8.f, + 8.f); + UnarySinglePrecisionRangeTest(j1_kernel, ref, AbsValidatorBuilderFactory(0.0000022), + 8.f, std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `j1(x)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are + * compared against reference function `long double j1l(long double)`. The maximum absolute error is + * 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(long double) = j1l; +#elif _WIN64 + long double (*ref)(long double) = _j1l; +#endif + UnaryDoublePrecisionBruteForceTest( + j1_kernel, ref, AbsValidatorBuilderFactory(5.e-12), + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for j1f and j1. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_j1_j1f_Negative_RTC") { NegativeTestRTCWrapper<4>(kJ1); } + +MATH_BESSEL_N_KERNEL_DEF(jn) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `jnf(x)` for all possible inputs and n equal to 5, 25 or + * 120. The results are compared against reference function `double jn(int, double)`. For `x` larger + * than n, the maximum absolute error is 2.2x10^-6. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jnf_Accuracy_Limited_Positive") { +#ifdef __unix__ + double (*ref)(int, double) = jn; +#elif _WIN64 + double (*ref)(int, double) = _jn; +#endif + int n = GENERATE(5, 25, 120); + BesselSinglePrecisionRangeTest(jn_kernel, ref, AbsValidatorBuilderFactory(0.0000022), n, n, + std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `jn(x)` against a table of difficult values, + * followed by a large number of randomly generated values from range and n equal to 5, 25, or 120. + * The results are compared against reference function `long double jnl(int, long double)`. The + * maximum absolute error is 5x10^-12. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jn_Accuracy_Limited_Positive") { +#ifdef __unix__ + long double (*ref)(int, long double) = jnl; +#elif _WIN64 + long double (*ref)(int, long double) = _jnl; +#endif + int n = GENERATE(5, 25, 120); + BesselDoublePrecisionBruteForceTest( + jn_kernel, ref, AbsValidatorBuilderFactory(5.e-12), n, + std::numeric_limits::lowest(), std::numeric_limits::max()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for jnf and jn. + * + * Test source + * ------------------------ + * - unit/math/special_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_jn_jnf_Negative_RTC") { NegativeTestRTCWrapper<8>(kJn); } From 0e54517f8490816cdad1da112a41d166341088b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:20:43 +0100 Subject: [PATCH 17/71] EXSWHTEC-303 - Implement tests for synchronization device functions #275 Change-Id: Iad0e303b21443615cda6fa68a9e3ef61a452a45c --- catch/include/hip_test_defgroups.hh | 7 + catch/unit/CMakeLists.txt | 1 + catch/unit/syncthreads/CMakeLists.txt | 45 ++++ catch/unit/syncthreads/__syncthreads.cc | 59 +++++ catch/unit/syncthreads/__syncthreads_and.cc | 241 ++++++++++++++++++ .../__syncthreads_and_negative_kernels.cc | 32 +++ .../__syncthreads_and_negative_kernels_rtc.hh | 39 +++ catch/unit/syncthreads/__syncthreads_count.cc | 241 ++++++++++++++++++ .../__syncthreads_count_negative_kernels.cc | 36 +++ ..._syncthreads_count_negative_kernels_rtc.hh | 39 +++ catch/unit/syncthreads/__syncthreads_or.cc | 241 ++++++++++++++++++ .../__syncthreads_or_negative_kernels.cc | 32 +++ .../__syncthreads_or_negative_kernels_rtc.hh | 39 +++ catch/unit/syncthreads/syncthreads_common.hh | 79 ++++++ 14 files changed, 1131 insertions(+) create mode 100644 catch/unit/syncthreads/CMakeLists.txt create mode 100644 catch/unit/syncthreads/__syncthreads.cc create mode 100644 catch/unit/syncthreads/__syncthreads_and.cc create mode 100644 catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc create mode 100644 catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh create mode 100644 catch/unit/syncthreads/__syncthreads_count.cc create mode 100644 catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc create mode 100644 catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh create mode 100644 catch/unit/syncthreads/__syncthreads_or.cc create mode 100644 catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc create mode 100644 catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh create mode 100644 catch/unit/syncthreads/syncthreads_common.hh diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 0a6d4dde90..7da19422c2 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -109,6 +109,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup SyncthreadsTest Synchronization Functions + * @{ + * This section describes tests for Synchronization Functions. + * @} + */ + /** * @defgroup MemoryTest memory Management APIs * @{ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index f1c8faa011..3074b8b0b5 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -49,6 +49,7 @@ add_subdirectory(atomics) add_subdirectory(complex) add_subdirectory(p2p) add_subdirectory(gcc) +add_subdirectory(syncthreads) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) diff --git a/catch/unit/syncthreads/CMakeLists.txt b/catch/unit/syncthreads/CMakeLists.txt new file mode 100644 index 0000000000..0dc5faf821 --- /dev/null +++ b/catch/unit/syncthreads/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + __syncthreads.cc + __syncthreads_count.cc + __syncthreads_and.cc + __syncthreads_or.cc +) + +hip_add_exe_to_target(NAME SyncthreadsTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) + +add_test(NAME Unit___syncthreads_count_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + __syncthreads_count_negative_kernels.cc 2) + +add_test(NAME Unit___syncthreads_and_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + __syncthreads_and_negative_kernels.cc 2) + +add_test(NAME Unit___syncthreads_or_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + __syncthreads_or_negative_kernels.cc 2) \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads.cc b/catch/unit/syncthreads/__syncthreads.cc new file mode 100644 index 0000000000..2f63ae39ae --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads __syncthreads + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and.cc b/catch/unit/syncthreads/__syncthreads_and.cc new file mode 100644 index 0000000000..91dc17fbb2 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_and_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_and __syncthreads_and + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_and`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with 0 as the predicate for even threads, and 1 as the predicate + * for odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_and` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_and`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_and_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsAndSource, "__syncthreads_and_negative.cc", + 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc new file mode 100644 index 0000000000..5d889a99d2 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and_negative_kernels.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_and_v1(int* predicate) { int result = __syncthreads_and(predicate); } + +__global__ void __syncthreads_and_v2(Dummy predicate) { int result = __syncthreads_and(predicate); } \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh new file mode 100644 index 0000000000..fc7be27871 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_and_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsAndSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_and_v1(int* predicate) { + int result = __syncthreads_and(predicate); + } + + __global__ void __syncthreads_and_v2(Dummy predicate) { + int result = __syncthreads_and(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count.cc b/catch/unit/syncthreads/__syncthreads_count.cc new file mode 100644 index 0000000000..dd084f436e --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_count_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_count __syncthreads_count + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_count`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with 0 as the predicate for even threads, and 1 as the predicate + * for odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, + 0, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_count` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize - 1); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_count`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_count.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_count_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsCountSource, + "__syncthreads_count_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc new file mode 100644 index 0000000000..83d8cf08ab --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count_negative_kernels.cc @@ -0,0 +1,36 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_count_v1(int* predicate) { + int result = __syncthreads_count(predicate); +} + +__global__ void __syncthreads_count_v2(Dummy predicate) { + int result = __syncthreads_count(predicate); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh new file mode 100644 index 0000000000..9f40e51175 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_count_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsCountSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_count_v1(int* predicate) { + int result = __syncthreads_count(predicate); + } + + __global__ void __syncthreads_count_v2(Dummy predicate) { + int result = __syncthreads_count(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or.cc b/catch/unit/syncthreads/__syncthreads_or.cc new file mode 100644 index 0000000000..d392c50eab --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or.cc @@ -0,0 +1,241 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "__syncthreads_or_negative_kernels_rtc.hh" +#include "syncthreads_common.hh" + +/** + * @addtogroup __syncthreads_or __syncthreads_or + * @{ + * @ingroup SyncthreadsTest + */ + +/** + * Test Description + * ------------------------ + * - Basic synchronization test for `__syncthreads_or`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Basic") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, sizeof(int) * kGridSize); + + HipTest::launchKernel(SyncthreadsKernel, kGridSize, kBlockSize, + sizeof(int) * kBlockSize, nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == kBlockSize * (kBlockSize + 1) / 2); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 0 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Zero") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsZeroKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 0); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 1 as the predicate for all threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_One") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOneKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with 0 as the predicate for even threads, and 1 as the predicate for + * odd threads. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_OddEven") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsOddEvenKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with a negative predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Negative") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsNegativeKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Test `__syncthreads_or` with the thread ID as the predicate. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Positive_Predicate_Id") { + const auto kGridSize = 2; + const auto kBlockSize = GENERATE(13, 32, 64, 513); + + LinearAllocGuard out_alloc(LinearAllocs::hipMallocManaged, + sizeof(int) * kGridSize * kBlockSize); + + HipTest::launchKernel(SyncthreadsIdKernel, kGridSize, kBlockSize, 0, + nullptr, out_alloc.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < kGridSize * kBlockSize; ++i) { + REQUIRE(out_alloc.host_ptr()[i] == 1); + } +} + +/** + * Test Description + * ------------------------ + * - Real-time compiles kernels that pass invalid arguments to `__syncthreads_or`. + * + * Test source + * ------------------------ + * - unit/syncthreads/__syncthreads_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___syncthreads_or_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + HIPRTC_CHECK(hiprtcCreateProgram(&program, kSyncthreadsOrSource, "__syncthreads_or_negative.cc", + 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{2}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc b/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc new file mode 100644 index 0000000000..b6f46a811c --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or_negative_kernels.cc @@ -0,0 +1,32 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void __syncthreads_or_v1(int* predicate) { int result = __syncthreads_or(predicate); } + +__global__ void __syncthreads_or_v2(Dummy predicate) { int result = __syncthreads_or(predicate); } \ No newline at end of file diff --git a/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh b/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh new file mode 100644 index 0000000000..dd7e1f93b0 --- /dev/null +++ b/catch/unit/syncthreads/__syncthreads_or_negative_kernels_rtc.hh @@ -0,0 +1,39 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +static constexpr auto kSyncthreadsOrSource{ + R"( + struct Dummy { + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void __syncthreads_or_v1(int* predicate) { + int result = __syncthreads_or(predicate); + } + + __global__ void __syncthreads_or_v2(Dummy predicate) { + int result = __syncthreads_or(predicate); + } + )"}; \ No newline at end of file diff --git a/catch/unit/syncthreads/syncthreads_common.hh b/catch/unit/syncthreads/syncthreads_common.hh new file mode 100644 index 0000000000..c6f9dec8d4 --- /dev/null +++ b/catch/unit/syncthreads/syncthreads_common.hh @@ -0,0 +1,79 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +enum class SyncthreadsKind { kDefault, kCount, kAnd, kOr }; + +template __device__ int Syncthreads(int predicate) { + if constexpr (kind == SyncthreadsKind::kDefault) { + __syncthreads(); + return 0; + } else if constexpr (kind == SyncthreadsKind::kCount) { + return __syncthreads_count(predicate); + } else if constexpr (kind == SyncthreadsKind::kAnd) { + return __syncthreads_and(predicate); + } else if constexpr (kind == SyncthreadsKind::kOr) { + return __syncthreads_or(predicate); + } +} + +template __global__ void SyncthreadsKernel(int* out) { + extern __shared__ int shared_mem[]; + + shared_mem[threadIdx.x] = threadIdx.x + 1; + + Syncthreads(0); + + if (threadIdx.x == 0) { + int sum = 0; + for (int i = 0; i < blockDim.x; ++i) { + sum += shared_mem[i]; + } + out[blockIdx.x] = sum; + } +} + +template __global__ void SyncthreadsZeroKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(0); +} + +template __global__ void SyncthreadsOneKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(1); +} + +template __global__ void SyncthreadsOddEvenKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(threadIdx.x % 2); +} + +template __global__ void SyncthreadsNegativeKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(-1); +} + +template __global__ void SyncthreadsIdKernel(int* out) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + out[tid] = Syncthreads(threadIdx.x); +} \ No newline at end of file From 8ef0d724f548fb6c25dfe35a8c9e31f00ef19d57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 18:19:15 +0100 Subject: [PATCH 18/71] EXSWHTEC-302 - Implement tests for memory fence device functions #276 Change-Id: I327527288f90011f09262708dd6372a7c6fc4708 --- catch/include/hip_test_defgroups.hh | 7 + catch/unit/CMakeLists.txt | 1 + catch/unit/threadfence/CMakeLists.txt | 29 +++ catch/unit/threadfence/__threadfence.cc | 201 ++++++++++++++++++ catch/unit/threadfence/__threadfence_block.cc | 201 ++++++++++++++++++ .../unit/threadfence/__threadfence_system.cc | 126 +++++++++++ catch/unit/threadfence/threadfence_common.hh | 108 ++++++++++ 7 files changed, 673 insertions(+) create mode 100644 catch/unit/threadfence/CMakeLists.txt create mode 100644 catch/unit/threadfence/__threadfence.cc create mode 100644 catch/unit/threadfence/__threadfence_block.cc create mode 100644 catch/unit/threadfence/__threadfence_system.cc create mode 100644 catch/unit/threadfence/threadfence_common.hh diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 7da19422c2..3fb6c774ef 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -116,6 +116,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup ThreadfenceTest Memory Fence Functions + * @{ + * This section describes tests for Memory Fence Functions. + * @} + */ + /** * @defgroup MemoryTest memory Management APIs * @{ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 3074b8b0b5..37f8b73cc5 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -50,6 +50,7 @@ add_subdirectory(complex) add_subdirectory(p2p) add_subdirectory(gcc) add_subdirectory(syncthreads) +add_subdirectory(threadfence) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) diff --git a/catch/unit/threadfence/CMakeLists.txt b/catch/unit/threadfence/CMakeLists.txt new file mode 100644 index 0000000000..51f61f4e2d --- /dev/null +++ b/catch/unit/threadfence/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + __threadfence_block.cc + __threadfence.cc + __threadfence_system.cc +) + +hip_add_exe_to_target(NAME ThreadfenceTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests) \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence.cc b/catch/unit/threadfence/__threadfence.cc new file mode 100644 index 0000000000..781da0bee3 --- /dev/null +++ b/catch/unit/threadfence/__threadfence.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence __threadfence + * @{ + * @ingroup ThreadfenceTest + */ + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on shared memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Shared") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 1, 2, + 4 * sizeof(int), nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on global memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Global") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Pinned") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on managed memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Managed") { + LinearAllocGuard in_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.ptr(), in_host.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a device-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HIP_CHECK(hipSetDevice(1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence_block.cc b/catch/unit/threadfence/__threadfence_block.cc new file mode 100644 index 0000000000..43079ea636 --- /dev/null +++ b/catch/unit/threadfence/__threadfence_block.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence_block __threadfence_block + * @{ + * @ingroup ThreadfenceTest + */ + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on shared memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Shared") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 1, 2, + 4 * sizeof(int), nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on global memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Global") { + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Pinned") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on managed memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Managed") { + LinearAllocGuard in_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipMallocManaged, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_host.ptr(), in_host.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a block-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_block.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_block_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HIP_CHECK(hipSetDevice(1)); + + HipTest::launchKernel(ThreadfenceTestKernel, 2, 1, 0, nullptr, + out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/__threadfence_system.cc b/catch/unit/threadfence/__threadfence_system.cc new file mode 100644 index 0000000000..078cf21511 --- /dev/null +++ b/catch/unit/threadfence/__threadfence_system.cc @@ -0,0 +1,126 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +#include "threadfence_common.hh" + +/** + * @addtogroup __threadfence_system __threadfence_system + * @{ + * @ingroup ThreadfenceTest + */ + +__global__ void WriteKernel(int* in) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid == 0) { + Write(in); + } +} + +__global__ void ReadKernel(int* out, int* in) { + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid == 0) { + Read(out, in); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a system-wide memory fence on global peer device memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_system_Positive_Basic_Peer") { + const auto device_count = HipTest::getDeviceCount(); + if (device_count < 2) { + HipTest::HIP_SKIP_TEST("At least 2 devices are required"); + return; + } + + int can_access_peer; + HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, 0, 1)); + REQUIRE(can_access_peer); + + HIP_CHECK(hipSetDevice(0)); + + LinearAllocGuard in_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + LinearAllocGuard out_dev(LinearAllocs::hipMalloc, 2 * sizeof(int)); + + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[0]), kInitVal1, 1)); + HIP_CHECK(hipMemsetD32(&(in_dev.ptr()[1]), kInitVal2, 1)); + + HipTest::launchKernel(WriteKernel, 1, 1, 0, nullptr, in_dev.ptr()); + + HIP_CHECK(hipSetDevice(1)); + HipTest::launchKernel(ReadKernel, 1, 1, 0, nullptr, out_dev.ptr(), in_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK(hipMemcpy(out_host.host_ptr(), out_dev.ptr(), 2 * sizeof(int), hipMemcpyDefault)); + + REQUIRE(!(out_host.ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} + +/** + * Test Description + * ------------------------ + * - Basic test for a system-wide memory fence on page-locked host memory. + * + * Test source + * ------------------------ + * - unit/threadfence/__threadfence_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit___threadfence_system_Positive_Basic_Host") { + LinearAllocGuard in_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + LinearAllocGuard out_host(LinearAllocs::hipHostMalloc, 2 * sizeof(int)); + + for (int i = 0; i < cmd_options.iterations; ++i) { + in_host.host_ptr()[0] = kInitVal1; + in_host.host_ptr()[1] = kInitVal2; + + HipTest::launchKernel(WriteKernel, 1, 1, 0, nullptr, in_host.host_ptr()); + Read(out_host.host_ptr(), in_host.host_ptr()); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(!(out_host.host_ptr()[0] == kInitVal1 && out_host.ptr()[1] == kSetVal2)); + } +} \ No newline at end of file diff --git a/catch/unit/threadfence/threadfence_common.hh b/catch/unit/threadfence/threadfence_common.hh new file mode 100644 index 0000000000..dc8dca776d --- /dev/null +++ b/catch/unit/threadfence/threadfence_common.hh @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +enum class ThreadfenceScope { kBlock, kDevice, kSystem }; + +template __device__ void Threadfence() { + if constexpr (scope == ThreadfenceScope::kBlock) { + __threadfence_block(); + } else if constexpr (scope == ThreadfenceScope::kDevice) { + __threadfence(); + } else if constexpr (scope == ThreadfenceScope::kSystem) { + __threadfence_system(); + } +} + +static constexpr int kInitVal1 = 1, kInitVal2 = 2; +static constexpr int kSetVal1 = 10, kSetVal2 = 20; + +template __host__ __device__ void Write(volatile int* in) { + in[0] = kSetVal1; +#ifdef __HIP_DEVICE_COMPILE__ + Threadfence(); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif + in[1] = kSetVal2; +} + +template +__host__ __device__ void Read(volatile int* out, volatile int* in) { + out[1] = in[1]; +#ifdef __HIP_DEVICE_COMPILE__ + Threadfence(); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif + out[0] = in[0]; +} + +template +__device__ void ThreadfenceTest(int* out, int* in) { + if constexpr (scope == ThreadfenceScope::kBlock || use_shared_mem) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + Write(in); + } else if (threadIdx.x == 1 && blockIdx.x == 0) { + Read(out, in); + } + } else if constexpr (scope == ThreadfenceScope::kDevice) { + if (threadIdx.x == 0 && blockIdx.x == 0) { + Write(in); + } else if (threadIdx.x == 0 && blockIdx.x == 1) { + Read(out, in); + } + } +} + +template +__global__ void ThreadfenceTestKernel(int* out, int* in) { + extern __shared__ int shared_mem[]; + + int tid = blockIdx.x * blockDim.x + threadIdx.x; + + int *out_mem = out, *in_mem = in; + + if constexpr (use_shared_mem) { + if (tid == 0) { + in_mem = &shared_mem[0]; + out_mem = &shared_mem[2]; + + in_mem[0] = in[0]; + in_mem[1] = in[1]; + } + + __syncthreads(); + } + + ThreadfenceTest(out_mem, in_mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + + if (tid == 0) { + out[0] = out_mem[0]; + out[1] = out_mem[1]; + } + } +} \ No newline at end of file From 59d6807cdba0779d3f561a44ae038feaf5ab7724 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 24 Jan 2024 00:57:39 +0530 Subject: [PATCH 19/71] EXSWHTEC-305 - Implement tests for double type casting intrinsics #283 Change-Id: Iacf67376949eed4a84f7e9e95bb51fd31b5ec6a4 --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/casting_common.hh | 195 ++++++ catch/unit/math/casting_double_funcs.cc | 597 ++++++++++++++++++ .../math/casting_double_negative_kernels.cc | 55 ++ .../casting_double_negative_kernels_rtc.hh | 157 +++++ 5 files changed, 1009 insertions(+) create mode 100644 catch/unit/math/casting_common.hh create mode 100644 catch/unit/math/casting_double_funcs.cc create mode 100644 catch/unit/math/casting_double_negative_kernels.cc create mode 100644 catch/unit/math/casting_double_negative_kernels_rtc.hh diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 1a9cd98f89..347d4e10ef 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -29,6 +29,7 @@ set(TEST_SRC pow_funcs.cc log_funcs.cc special_funcs.cc + casting_double_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -101,3 +102,7 @@ add_test(NAME Unit_Device_special_funcs_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} math_special_func_kernels.cc 76) +add_test(NAME Unit_Device_casting_double_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_double_negative_kernels.cc 69) diff --git a/catch/unit/math/casting_common.hh b/catch/unit/math/casting_common.hh new file mode 100644 index 0000000000..6077360ffa --- /dev/null +++ b/catch/unit/math/casting_common.hh @@ -0,0 +1,195 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "unary_common.hh" +#include + +namespace cg = cooperative_groups; + +#define CAST_KERNEL_DEF(func_name, T1, T2) \ + __global__ void func_name##_kernel(T1* const ys, const size_t num_xs, T2* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define CAST_F2I_REF_DEF(func_name, T1, T2, ref_func) \ + T1 func_name##_ref(T2 arg) { \ + if (arg >= static_cast(std::numeric_limits::max())) \ + return std::numeric_limits::max(); \ + else if (arg <= static_cast(std::numeric_limits::min())) \ + return std::numeric_limits::min(); \ + T2 result = ref_func(arg); \ + return result; \ + } + +#define CAST_F2I_RZ_REF_DEF(func_name, T1, T2) \ + T1 func_name##_ref(T2 arg) { \ + if (arg >= static_cast(std::numeric_limits::max())) \ + return std::numeric_limits::max(); \ + else if (arg <= static_cast(std::numeric_limits::min())) \ + return std::numeric_limits::min(); \ + T1 result = static_cast(arg); \ + return result; \ + } + +#define CAST_RND_REF_DEF(func_name, T1, T2, round_dir) \ + T1 func_name##_ref(T2 arg) { \ + int curr_direction = fegetround(); \ + fesetround(round_dir); \ + T1 result = static_cast(arg); \ + fesetround(curr_direction); \ + return result; \ + } + +#define CAST_REF_DEF(func_name, T1, T2) \ + T1 func_name##_ref(T2 arg) { \ + T1 result = static_cast(arg); \ + return result; \ + } + + +template T1 type2_as_type1_ref(T2 arg) { + T1 tmp; + memcpy(&tmp, &arg, sizeof(tmp)); + return tmp; +} + +template +void CastDoublePrecisionSpecialValuesTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto values = std::get>(kSpecialValRegistry); + std::vector spec_values; + + if (!std::is_same_v && !std::is_same_v && !std::is_same_v) { + for (int i = 0; i < values.size; i++) { + if (!std::isnan(values.data[i]) && !std::isinf(values.data[i])) { + spec_values.push_back(values.data[i]); + } + } + } + + MathTest math_test(kernel, spec_values.size()); + math_test.template Run(validator_builder, grid_size, block_size, ref_func, + spec_values.size(), spec_values.data()); +} + +template +void CastDoublePrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Special values") { + CastDoublePrecisionSpecialValuesTest(kernel, ref, validator_builder); + } + + SECTION("Brute force") { UnaryDoublePrecisionBruteForceTest(kernel, ref, validator_builder); } +} + +template +void CastIntRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + size_t inserted = 0u; + for (TArg v = a; v <= b; v++) { + values.ptr()[inserted++] = v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values.ptr()); + inserted = 0u; + } +} + +template +void CastIntBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const TArg a = std::numeric_limits::lowest(), + const TArg b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const uint64_t num_iterations = GetTestIterationCount(); + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(T) + sizeof(TArg)), num_iterations); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(TArg)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + for (uint64_t i = 0ul; i < num_iterations; i += batch_size) { + batch_size = std::min(max_batch_size, num_iterations - i); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + thread_pool.Post([=, &values] { + const auto generator = [=] { + static thread_local std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution unif_dist(a, b); + return static_cast(unif_dist(rng)); + }; + std::generate(values.ptr() + base_idx, values.ptr() + base_idx + sub_batch_size, generator); + }); + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void CastBinaryIntRangeTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder, + const T2 a = std::numeric_limits::lowest(), + const T2 b = std::numeric_limits::max()) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + const auto max_batch_size = GetMaxAllowedDeviceMemoryUsage() / (sizeof(T1) + 2 * sizeof(T2)); + LinearAllocGuard values1{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + LinearAllocGuard values2{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(T2)}; + + MathTest math_test(kernel, max_batch_size); + + size_t inserted = 0u; + for (T2 v = a; v <= b; v++) { + values1.ptr()[inserted] = v; + values2.ptr()[inserted++] = b - v; + if (inserted < max_batch_size) continue; + + math_test.Run(validator_builder, grid_size, block_size, ref_func, inserted, values1.ptr(), + values2.ptr()); + inserted = 0u; + } +} diff --git a/catch/unit/math/casting_double_funcs.cc b/catch/unit/math/casting_double_funcs.cc new file mode 100644 index 0000000000..fcdbe441ef --- /dev/null +++ b/catch/unit/math/casting_double_funcs.cc @@ -0,0 +1,597 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_double_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingDoubleType CastingDoubleType + * @{ + * @ingroup MathTest + */ + +#define CAST_DOUBLE2INT_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_REF_DEF(kern_name, T, double, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_DOUBLE2INT_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_rd, int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_rn, int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2int_ru, int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2int_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_RZ_TEST_DEF(__double2int_rz, int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2int_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2int_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Int); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rd` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_rd, unsigned int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_rn, unsigned int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_ru` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_TEST_DEF(__double2uint_ru, unsigned int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2uint_rz` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2INT_RZ_TEST_DEF(__double2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2uint_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2uint_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Uint); } + +#define CAST_DOUBLE2LL_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_REF_DEF(kern_name, T, double, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +#define CAST_DOUBLE2LL_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, double) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(double) = kern_name##_ref; \ + UnaryDoublePrecisionBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_rd, long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_rn, long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ll_ru, long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ll_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ll_rz, long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2ll_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2ll_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2LL); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rd` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_rd, unsigned long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rn` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_rn, unsigned long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_ru` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function + * `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_TEST_DEF(__double2ull_ru, unsigned long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2ull_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to unsigned long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2LL_RZ_TEST_DEF(__double2ull_rz, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2ull_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2ull_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2ULL); } + +#define CAST_DOUBLE2FLOAT_TEST_DEF(kern_name, round_dir) \ + CAST_KERNEL_DEF(kern_name, float, double) \ + CAST_RND_REF_DEF(kern_name, float, double, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + float (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_DOUBLE2FLOAT_RN_TEST_DEF(kern_name) \ + CAST_KERNEL_DEF(kern_name, float, double) \ + CAST_REF_DEF(kern_name, float, double) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + float (*ref)(double) = kern_name##_ref; \ + CastDoublePrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rd` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_DOWNWARD. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rd, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_RN_TEST_DEF(__double2float_rn) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_ru` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_UPWARD. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_ru, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2float_rz` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs cast to float with rounding mode FE_TOWARDZERO. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_DOUBLE2FLOAT_TEST_DEF(__double2float_rz, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kDouble2Float); } + +CAST_KERNEL_DEF(__double2hiint, int, double) + +int __double2hiint_ref(double arg) { + int tmp[2]; + memcpy(tmp, &arg, sizeof(tmp)); + return tmp[1]; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2hiint` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs copy of higher part of double value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2hiint_Positive") { + int (*ref)(double) = __double2hiint_ref; + CastDoublePrecisionTest(__double2hiint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2hiint. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2hiint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Hiint); } + +CAST_KERNEL_DEF(__double2loint, int, double) + +int __double2loint_ref(double arg) { + int tmp[2]; + memcpy(tmp, &arg, sizeof(tmp)); + return tmp[0]; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double2loint` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs copy of lower part of double value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2loint_Positive") { + int (*ref)(double) = __double2loint_ref; + CastDoublePrecisionTest(__double2loint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double2loint. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double2loint_Negative_RTC") { NegativeTestRTCWrapper<3>(kDouble2Loint); } + +CAST_KERNEL_DEF(__double_as_longlong, long long int, double) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__double_as_longlong` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which performs copy of double value to long long int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double_as_longlong_Positive") { + long long int (*ref)(double) = type2_as_type1_ref; + CastDoublePrecisionTest(__double_as_longlong_kernel, ref, + EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __double_as_longlong. + * + * Test source + * ------------------------ + * - unit/math/casting_double_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___double_as_longlong_Negative_RTC") { + NegativeTestRTCWrapper<3>(kDoubleAsLonglong); +} \ No newline at end of file diff --git a/catch/unit/math/casting_double_negative_kernels.cc b/catch/unit/math/casting_double_negative_kernels.cc new file mode 100644 index 0000000000..8386107aed --- /dev/null +++ b/catch/unit/math/casting_double_negative_kernels.cc @@ -0,0 +1,55 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, double* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, double x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__double2int_rd, int) +NEGATIVE_KERNELS_SHELL(__double2int_rn, int) +NEGATIVE_KERNELS_SHELL(__double2int_ru, int) +NEGATIVE_KERNELS_SHELL(__double2int_rz, int) +NEGATIVE_KERNELS_SHELL(__double2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__double2ll_rd, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_rn, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_ru, long long int) +NEGATIVE_KERNELS_SHELL(__double2ll_rz, long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rd, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rn, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_ru, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2ull_rz, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__double2float_rd, float) +NEGATIVE_KERNELS_SHELL(__double2float_rn, float) +NEGATIVE_KERNELS_SHELL(__double2float_ru, float) +NEGATIVE_KERNELS_SHELL(__double2float_rz, float) +NEGATIVE_KERNELS_SHELL(__double2hiint, int) +NEGATIVE_KERNELS_SHELL(__double2loint, int) +NEGATIVE_KERNELS_SHELL(__double_as_longlong, long long int) \ No newline at end of file diff --git a/catch/unit/math/casting_double_negative_kernels_rtc.hh b/catch/unit/math/casting_double_negative_kernels_rtc.hh new file mode 100644 index 0000000000..440f4cad9d --- /dev/null +++ b/catch/unit/math/casting_double_negative_kernels_rtc.hh @@ -0,0 +1,157 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the double type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kDouble2Int{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2int_rd_kernel_v1(int* result, double* x) { *result = __double2int_rd(x); } + __global__ void double2int_rd_kernel_v2(int* result, Dummy x) { *result = __double2int_rd(x); } + __global__ void double2int_rd_kernel_v3(Dummy* result, double x) { *result = __double2int_rd(x); } + __global__ void double2int_rn_kernel_v1(int* result, double* x) { *result = __double2int_rn(x); } + __global__ void double2int_rn_kernel_v2(int* result, Dummy x) { *result = __double2int_rn(x); } + __global__ void double2int_rn_kernel_v3(Dummy* result, double x) { *result = __double2int_rn(x); } + __global__ void double2int_ru_kernel_v1(int* result, double* x) { *result = __double2int_ru(x); } + __global__ void double2int_ru_kernel_v2(int* result, Dummy x) { *result = __double2int_ru(x); } + __global__ void double2int_ru_kernel_v3(Dummy* result, double x) { *result = __double2int_ru(x); } + __global__ void double2int_rz_kernel_v1(int* result, double* x) { *result = __double2int_rz(x); } + __global__ void double2int_rz_kernel_v2(int* result, Dummy x) { *result = __double2int_rz(x); } + __global__ void double2int_rz_kernel_v3(Dummy* result, double x) { *result = __double2int_rz(x); } +)"}; + +static constexpr auto kDouble2Uint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2uint_rd_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rd_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rd_kernel_v3(Dummy* result, double x) { *result = __double2uint_rd(x); } + __global__ void double2uint_rn_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rn(x); } + __global__ void double2uint_rn_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rn(x); } + __global__ void double2uint_rn_kernel_v3(Dummy* result, double x) { *result = __double2uint_rn(x); } + __global__ void double2uint_ru_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_ru(x); } + __global__ void double2uint_ru_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_ru(x); } + __global__ void double2uint_ru_kernel_v3(Dummy* result, double x) { *result = __double2uint_ru(x); } + __global__ void double2uint_rz_kernel_v1(unsigned int* result, double* x) { *result = __double2uint_rz(x); } + __global__ void double2uint_rz_kernel_v2(unsigned int* result, Dummy x) { *result = __double2uint_rz(x); } + __global__ void double2uint_rz_kernel_v3(Dummy* result, double x) { *result = __double2uint_rz(x); } +)"}; + +static constexpr auto kDouble2LL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2ll_rd_kernel_v1(long long int* result, double* x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rd_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rd_kernel_v3(Dummy* result, double x) { *result = __double2ll_rd(x); } + __global__ void double2ll_rn_kernel_v1(long long int* result, double* x) { *result = __double2ll_rn(x); } + __global__ void double2ll_rn_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rn(x); } + __global__ void double2ll_rn_kernel_v3(Dummy* result, double x) { *result = __double2ll_rn(x); } + __global__ void double2ll_ru_kernel_v1(long long int* result, double* x) { *result = __double2ll_ru(x); } + __global__ void double2ll_ru_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_ru(x); } + __global__ void double2ll_ru_kernel_v3(Dummy* result, double x) { *result = __double2ll_ru(x); } + __global__ void double2ll_rz_kernel_v1(long long int* result, double* x) { *result = __double2ll_rz(x); } + __global__ void double2ll_rz_kernel_v2(long long int* result, Dummy x) { *result = __double2ll_rz(x); } + __global__ void double2ll_rz_kernel_v3(Dummy* result, double x) { *result = __double2ll_rz(x); } +)"}; + +static constexpr auto kDouble2ULL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2ull_rd_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rd_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rd_kernel_v3(Dummy* result, double x) { *result = __double2ull_rd(x); } + __global__ void double2ull_rn_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rn(x); } + __global__ void double2ull_rn_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rn(x); } + __global__ void double2ull_rn_kernel_v3(Dummy* result, double x) { *result = __double2ull_rn(x); } + __global__ void double2ull_ru_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_ru(x); } + __global__ void double2ull_ru_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_ru(x); } + __global__ void double2ull_ru_kernel_v3(Dummy* result, double x) { *result = __double2ull_ru(x); } + __global__ void double2ull_rz_kernel_v1(unsigned long long int* result, double* x) { *result = __double2ull_rz(x); } + __global__ void double2ull_rz_kernel_v2(unsigned long long int* result, Dummy x) { *result = __double2ull_rz(x); } + __global__ void double2ull_rz_kernel_v3(Dummy* result, double x) { *result = __double2ull_rz(x); } +)"}; + +static constexpr auto kDouble2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2float_rd_kernel_v1(float* result, double* x) { *result = __double2float_rd(x); } + __global__ void double2float_rd_kernel_v2(float* result, Dummy x) { *result = __double2float_rd(x); } + __global__ void double2float_rd_kernel_v3(Dummy* result, double x) { *result = __double2float_rd(x); } + __global__ void double2float_rn_kernel_v1(float* result, double* x) { *result = __double2float_rn(x); } + __global__ void double2float_rn_kernel_v2(float* result, Dummy x) { *result = __double2float_rn(x); } + __global__ void double2float_rn_kernel_v3(Dummy* result, double x) { *result = __double2float_rn(x); } + __global__ void double2float_ru_kernel_v1(float* result, double* x) { *result = __double2float_ru(x); } + __global__ void double2float_ru_kernel_v2(float* result, Dummy x) { *result = __double2float_ru(x); } + __global__ void double2float_ru_kernel_v3(Dummy* result, double x) { *result = __double2float_ru(x); } + __global__ void double2float_rz_kernel_v1(float* result, double* x) { *result = __double2float_rz(x); } + __global__ void double2float_rz_kernel_v2(float* result, Dummy x) { *result = __double2float_rz(x); } + __global__ void double2float_rz_kernel_v3(Dummy* result, double x) { *result = __double2float_rz(x); } +)"}; + +static constexpr auto kDouble2Hiint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2hiint_kernel_v1(int* result, double* x) { *result = __double2hiint(x); } + __global__ void double2hiint_kernel_v2(int* result, Dummy x) { *result = __double2hiint(x); } + __global__ void double2hiint_kernel_v3(Dummy* result, double x) { *result = __double2hiint(x); } +)"}; + +static constexpr auto kDouble2Loint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double2loint_kernel_v1(int* result, double* x) { *result = __double2loint(x); } + __global__ void double2loint_kernel_v2(int* result, Dummy x) { *result = __double2loint(x); } + __global__ void double2loint_kernel_v3(Dummy* result, double x) { *result = __double2loint(x); } +)"}; + +static constexpr auto kDoubleAsLonglong{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void double_as_longlong_kernel_v1(long long int* result, double* x) { *result = __double_as_longlong(x); } + __global__ void double_as_longlong_kernel_v2(long long int* result, Dummy x) { *result = __double_as_longlong(x); } + __global__ void double_as_longlong_kernel_v3(Dummy* result, double x) { *result = __double_as_longlong(x); } +)"}; From bff6c461ee050f8fb6fd3a2295871f6bc6c7cf98 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 24 Jan 2024 21:05:16 +0530 Subject: [PATCH 20/71] EXSWHTEC-310 - Implement tests for float type casting intrinsics #284 Change-Id: Ia5e5362d81c2570f1ff7e112ab1df5e0f7c68f05 --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/casting_float_funcs.cc | 440 ++++++++++++++++++ .../math/casting_float_negative_kernels.cc | 50 ++ .../casting_float_negative_kernels_rtc.hh | 126 +++++ catch/unit/math/math_common.hh | 2 +- 5 files changed, 622 insertions(+), 1 deletion(-) create mode 100644 catch/unit/math/casting_float_funcs.cc create mode 100644 catch/unit/math/casting_float_negative_kernels.cc create mode 100644 catch/unit/math/casting_float_negative_kernels_rtc.hh diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 347d4e10ef..b3e5a937d1 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -30,6 +30,7 @@ set(TEST_SRC log_funcs.cc special_funcs.cc casting_double_funcs.cc + casting_float_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -106,3 +107,7 @@ add_test(NAME Unit_Device_casting_double_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} casting_double_negative_kernels.cc 69) +add_test(NAME Unit_Device_casting_float_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_float_negative_kernels.cc 54) diff --git a/catch/unit/math/casting_float_funcs.cc b/catch/unit/math/casting_float_funcs.cc new file mode 100644 index 0000000000..f5e92e218b --- /dev/null +++ b/catch/unit/math/casting_float_funcs.cc @@ -0,0 +1,440 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_float_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingFloatType CastingFloatType + * @{ + * @ingroup MathTest + */ + +#define CAST_FLOAT2INT_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_REF_DEF(kern_name, T, float, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::lowest(), \ + std::numeric_limits::max()); \ + } + +#define CAST_FLOAT2INT_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::lowest(), \ + std::numeric_limits::max()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rd` for all possible inputs. The results are compared against + * reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rd, int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rn` for all possible inputs. The results are compared against + * reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rn, int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_ru` for all possible inputs. The results are compared against + * reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_ru, int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2int_rz` for all possible inputs. The results are compared against + * reference function `std::trunc`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2int_rz, int, std::trunc) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2int_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2int_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2Int); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rd` for all possible inputs. The results are compared + * against reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_rd, unsigned int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rn` for all possible inputs. The results are compared + * against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_rn, unsigned int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_ru` for all possible inputs. The results are compared + * against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_TEST_DEF(__float2uint_ru, unsigned int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2uint_rz` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * performs cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2INT_RZ_TEST_DEF(__float2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2uint_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2uint_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2Uint); } + +#define CAST_FLOAT2LL_TEST_DEF(kern_name, T, ref_func) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_REF_DEF(kern_name, T, float, ref_func) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +#define CAST_FLOAT2LL_RZ_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, float) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + static_cast(std::numeric_limits::min()), \ + static_cast(std::numeric_limits::max())); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rd` for all possible inputs. The results are compared against + * reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_rd, long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rn` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_rn, long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_ru` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ll_ru, long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rz` for all possible inputs between lowest and maximal long + * long int value. The results are compared against reference function which performs cast to long + * long int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_RZ_TEST_DEF(__float2ll_rz, long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2ll_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2ll_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2LL); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_rd` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::floor`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_rd, unsigned long long int, std::floor) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_rn` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::rint`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_rn, unsigned long long int, std::rint) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ull_ru` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function `std::ceil`. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_TEST_DEF(__float2ull_ru, unsigned long long int, std::ceil) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2ll_rz` for all possible inputs between lowest and maximal + * unsigned long long int value. The results are compared against reference function which performs + * cast to unsigned long long int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2LL_RZ_TEST_DEF(__float2ull_rz, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float2ull_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2ull_Negative_RTC") { NegativeTestRTCWrapper<12>(kFloat2ULL); } + +CAST_KERNEL_DEF(__float_as_int, int, float) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float_as_int` for all possible inputs. The results are compared against + * reference function which performs copy of float value to int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_int_Positive") { + int (*ref)(float) = type2_as_type1_ref; + UnarySinglePrecisionTest(__float_as_int_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float_as_int. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_int_Negative_RTC") { NegativeTestRTCWrapper<3>(kFloatAsInt); } + +CAST_KERNEL_DEF(__float_as_uint, unsigned int, float) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float_as_uint` for all possible inputs. The results are compared + * against reference function which performs copy of float value to unsigned int variable. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_uint_Positive") { + unsigned int (*ref)(float) = type2_as_type1_ref; + UnarySinglePrecisionTest(__float_as_uint_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __float_as_uint. + * + * Test source + * ------------------------ + * - unit/math/casting_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float_as_uint_Negative_RTC") { NegativeTestRTCWrapper<3>(kFloatAsUint); } diff --git a/catch/unit/math/casting_float_negative_kernels.cc b/catch/unit/math/casting_float_negative_kernels.cc new file mode 100644 index 0000000000..eecbd6dd7e --- /dev/null +++ b/catch/unit/math/casting_float_negative_kernels.cc @@ -0,0 +1,50 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, float* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, float x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__float2int_rd, int) +NEGATIVE_KERNELS_SHELL(__float2int_rn, int) +NEGATIVE_KERNELS_SHELL(__float2int_ru, int) +NEGATIVE_KERNELS_SHELL(__float2int_rz, int) +NEGATIVE_KERNELS_SHELL(__float2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__float2ll_rd, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_rn, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_ru, long long int) +NEGATIVE_KERNELS_SHELL(__float2ll_rz, long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rd, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rn, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_ru, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float2ull_rz, unsigned long long int) +NEGATIVE_KERNELS_SHELL(__float_as_int, int) +NEGATIVE_KERNELS_SHELL(__float_as_uint, unsigned int) \ No newline at end of file diff --git a/catch/unit/math/casting_float_negative_kernels_rtc.hh b/catch/unit/math/casting_float_negative_kernels_rtc.hh new file mode 100644 index 0000000000..45fba3b0e7 --- /dev/null +++ b/catch/unit/math/casting_float_negative_kernels_rtc.hh @@ -0,0 +1,126 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the float type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kFloat2Int{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2int_rd_kernel_v1(int* result, float* x) { *result = __float2int_rd(x); } + __global__ void float2int_rd_kernel_v2(int* result, Dummy x) { *result = __float2int_rd(x); } + __global__ void float2int_rd_kernel_v3(Dummy* result, float x) { *result = __float2int_rd(x); } + __global__ void float2int_rn_kernel_v1(int* result, float* x) { *result = __float2int_rn(x); } + __global__ void float2int_rn_kernel_v2(int* result, Dummy x) { *result = __float2int_rn(x); } + __global__ void float2int_rn_kernel_v3(Dummy* result, float x) { *result = __float2int_rn(x); } + __global__ void float2int_ru_kernel_v1(int* result, float* x) { *result = __float2int_ru(x); } + __global__ void float2int_ru_kernel_v2(int* result, Dummy x) { *result = __float2int_ru(x); } + __global__ void float2int_ru_kernel_v3(Dummy* result, float x) { *result = __float2int_ru(x); } + __global__ void float2int_rz_kernel_v1(int* result, float* x) { *result = __float2int_rz(x); } + __global__ void float2int_rz_kernel_v2(int* result, Dummy x) { *result = __float2int_rz(x); } + __global__ void float2int_rz_kernel_v3(Dummy* result, float x) { *result = __float2int_rz(x); } +)"}; + +static constexpr auto kFloat2Uint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2uint_rd_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rd_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rd_kernel_v3(Dummy* result, float x) { *result = __float2uint_rd(x); } + __global__ void float2uint_rn_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rn(x); } + __global__ void float2uint_rn_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rn(x); } + __global__ void float2uint_rn_kernel_v3(Dummy* result, float x) { *result = __float2uint_rn(x); } + __global__ void float2uint_ru_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_ru(x); } + __global__ void float2uint_ru_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_ru(x); } + __global__ void float2uint_ru_kernel_v3(Dummy* result, float x) { *result = __float2uint_ru(x); } + __global__ void float2uint_rz_kernel_v1(unsigned int* result, float* x) { *result = __float2uint_rz(x); } + __global__ void float2uint_rz_kernel_v2(unsigned int* result, Dummy x) { *result = __float2uint_rz(x); } + __global__ void float2uint_rz_kernel_v3(Dummy* result, float x) { *result = __float2uint_rz(x); } +)"}; + +static constexpr auto kFloat2LL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2ll_rd_kernel_v1(long long int* result, float* x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rd_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rd_kernel_v3(Dummy* result, float x) { *result = __float2ll_rd(x); } + __global__ void float2ll_rn_kernel_v1(long long int* result, float* x) { *result = __float2ll_rn(x); } + __global__ void float2ll_rn_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rn(x); } + __global__ void float2ll_rn_kernel_v3(Dummy* result, float x) { *result = __float2ll_rn(x); } + __global__ void float2ll_ru_kernel_v1(long long int* result, float* x) { *result = __float2ll_ru(x); } + __global__ void float2ll_ru_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_ru(x); } + __global__ void float2ll_ru_kernel_v3(Dummy* result, float x) { *result = __float2ll_ru(x); } + __global__ void float2ll_rz_kernel_v1(long long int* result, float* x) { *result = __float2ll_rz(x); } + __global__ void float2ll_rz_kernel_v2(long long int* result, Dummy x) { *result = __float2ll_rz(x); } + __global__ void float2ll_rz_kernel_v3(Dummy* result, float x) { *result = __float2ll_rz(x); } +)"}; + +static constexpr auto kFloat2ULL{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float2ull_rd_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rd_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rd_kernel_v3(Dummy* result, float x) { *result = __float2ull_rd(x); } + __global__ void float2ull_rn_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rn(x); } + __global__ void float2ull_rn_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rn(x); } + __global__ void float2ull_rn_kernel_v3(Dummy* result, float x) { *result = __float2ull_rn(x); } + __global__ void float2ull_ru_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_ru(x); } + __global__ void float2ull_ru_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_ru(x); } + __global__ void float2ull_ru_kernel_v3(Dummy* result, float x) { *result = __float2ull_ru(x); } + __global__ void float2ull_rz_kernel_v1(unsigned long long int* result, float* x) { *result = __float2ull_rz(x); } + __global__ void float2ull_rz_kernel_v2(unsigned long long int* result, Dummy x) { *result = __float2ull_rz(x); } + __global__ void float2ull_rz_kernel_v3(Dummy* result, float x) { *result = __float2ull_rz(x); } +)"}; + +static constexpr auto kFloatAsInt{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float_as_int_kernel_v1(int* result, float* x) { *result = __float_as_int(x); } + __global__ void float_as_int_kernel_v2(int* result, Dummy x) { *result = __float_as_int(x); } + __global__ void float_as_int_kernel_v3(Dummy* result, float x) { *result = __float_as_int(x); } +)"}; + +static constexpr auto kFloatAsUint{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void float_as_uint_kernel_v1(unsigned int* result, float* x) { *result = __float_as_uint(x); } + __global__ void float_as_uint_kernel_v2(unsigned int* result, Dummy x) { *result = __float_as_uint(x); } + __global__ void float_as_uint_kernel_v3(Dummy* result, float x) { *result = __float_as_uint(x); } +)"}; diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh index 010780474f..738dbf66c0 100644 --- a/catch/unit/math/math_common.hh +++ b/catch/unit/math/math_common.hh @@ -215,7 +215,7 @@ template void NegativeTestRTCWrapper(const char* program_source) HIPRTC_CHECK( hiprtcCreateProgram(&program, program_source, "math_test_rtc.cc", 0, nullptr, nullptr)); #if HT_AMD - std::string args = std::string("-ferror-limit=100"); + std::string args = std::string("-ferror-limit=200"); const char* options[] = {args.c_str()}; hiprtcResult result{hiprtcCompileProgram(program, 1, options)}; #else From f9cf87fe602d3ff6ec6553823edd5d5484d0657e Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 24 Jan 2024 22:01:49 +0530 Subject: [PATCH 21/71] EXSWHTEC-311 - Implement tests for integer type casting intrinsics #285 Change-Id: I6e6bee38dad6948d46ba2ce0d5d2e3b27c150d35 --- catch/include/cmd_options.hh | 4 + catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/casting_int_funcs.cc | 735 ++++++++++++++++++ .../unit/math/casting_int_negative_kernels.cc | 79 ++ .../math/casting_int_negative_kernels_rtc.hh | 215 +++++ catch/unit/math/math_common.hh | 11 + 6 files changed, 1049 insertions(+) create mode 100644 catch/unit/math/casting_int_funcs.cc create mode 100644 catch/unit/math/casting_int_negative_kernels.cc create mode 100644 catch/unit/math/casting_int_negative_kernels_rtc.hh diff --git a/catch/include/cmd_options.hh b/catch/include/cmd_options.hh index 666f34ea82..71f21006e0 100644 --- a/catch/include/cmd_options.hh +++ b/catch/include/cmd_options.hh @@ -37,3 +37,7 @@ struct CmdOptions { }; extern CmdOptions cmd_options; +<<<<<<< HEAD +======= + +>>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index b3e5a937d1..973e875abe 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -31,6 +31,7 @@ set(TEST_SRC special_funcs.cc casting_double_funcs.cc casting_float_funcs.cc + casting_int_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -111,3 +112,7 @@ add_test(NAME Unit_Device_casting_float_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} casting_float_negative_kernels.cc 54) +add_test(NAME Unit_Device_casting_int_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_int_negative_kernels.cc 92) diff --git a/catch/unit/math/casting_int_funcs.cc b/catch/unit/math/casting_int_funcs.cc new file mode 100644 index 0000000000..49e8ae7463 --- /dev/null +++ b/catch/unit/math/casting_int_funcs.cc @@ -0,0 +1,735 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "casting_common.hh" +#include "casting_int_negative_kernels_rtc.hh" + +/** + * @addtogroup CastingIntTypes CastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_INT2FLOAT_TEST_DEF(kern_name, T1, T2, round_dir) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_RND_REF_DEF(kern_name, T1, T2, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_INT2FLOAT_RN_TEST_DEF(kern_name, T1, T2) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_REF_DEF(kern_name, T1, T2) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rd` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_DOWNWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_rd, float, int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rn` for all possible inputs. The results are compared against + * reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__int2float_rn, float, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_ru` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_UPWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_ru, float, int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2float_rz` for all possible inputs. The results are compared against + * reference function which performs cast to float with FE_TOWARDZERO rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__int2float_rz, float, int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_int2float___Negative_RTC") { NegativeTestRTCWrapper<12>(kInt2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rd` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_DOWNWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_rd, float, unsigned int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rn` for all possible inputs. The results are compared + * against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__uint2float_rn, float, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_ru` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_UPWARD rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_ru, float, unsigned int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2float_rz` for all possible inputs. The results are compared + * against reference function which performs cast to float with FE_TOWARDZERO rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_TEST_DEF(__uint2float_rz, float, unsigned int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kUint2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2double_rn` for all possible inputs. The results are compared + * against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__int2double_rn, double, int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int2double_rn. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int2double_Negative_RTC") { NegativeTestRTCWrapper<3>(kInt2Double); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2double_rn` for all possible inputs. The results are compared + * against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2FLOAT_RN_TEST_DEF(__uint2double_rn, double, unsigned int) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint2double_rn. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint2double_Negative_RTC") { NegativeTestRTCWrapper<3>(kUint2Double); } + +#define CAST_LL2FLOAT_TEST_DEF(kern_name, T1, T2, round_dir) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_RND_REF_DEF(kern_name, T1, T2, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +#define CAST_LL2FLOAT_RN_TEST_DEF(kern_name, T1, T2) \ + CAST_KERNEL_DEF(kern_name, T1, T2) \ + CAST_REF_DEF(kern_name, T1, T2) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Positive") { \ + T1 (*ref)(T2) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_rd, float, long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ll2float_rn, float, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_ru, float, long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2float_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2float_rz, float, long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ll2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ll2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kLL2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_rd, float, unsigned long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ull2float_rn, float, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_ru, float, unsigned long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2float_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to float with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2float_rz, float, unsigned long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ull2float_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ull2float_Negative_RTC") { NegativeTestRTCWrapper<12>(kULL2Float); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_rd, double, long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ll2double_rn, double, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_ru, double, long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2double_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ll2double_rz, double, long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ll2double_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ll2double_Negative_RTC") { NegativeTestRTCWrapper<12>(kLL2Double); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_DOWNWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_rd, double, unsigned long long int, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_RN_TEST_DEF(__ull2double_rn, double, unsigned long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_UPWARD + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_ru, double, unsigned long long int, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2double_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs cast to double with FE_TOWARDZERO + * rounding mode. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2FLOAT_TEST_DEF(__ull2double_rz, double, unsigned long long int, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __ull2double_[rd,rn,ru,rz]. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ull2double_Negative_RTC") { NegativeTestRTCWrapper<12>(kULL2Double); } + +CAST_KERNEL_DEF(__int_as_float, float, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int_as_float` for all possible inputs. The results are compared against + * reference function which performs copy of int value to float variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int_as_float_Positive") { + float (*ref)(int) = type2_as_type1_ref; + CastIntRangeTest(__int_as_float_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __int_as_float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___int_as_float_Negative_RTC") { NegativeTestRTCWrapper<3>(kIntAsFloat); } + +CAST_KERNEL_DEF(__uint_as_float, float, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint_as_float` for all possible inputs. The results are compared + * against reference function which performs copy of unsigned int value to float variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint_as_float_Positive") { + float (*ref)(unsigned int) = type2_as_type1_ref; + CastIntRangeTest(__uint_as_float_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __uint_as_float. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uint_as_float_Negative_RTC") { NegativeTestRTCWrapper<3>(kUintAsFloat); } + +CAST_KERNEL_DEF(__longlong_as_double, double, long long int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__longlong_as_double` against a large number of randomly generated + * values. The results are compared against reference function which performs copy of long long int + * value to double variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___longlong_as_double_Positive") { + double (*ref)(long long int) = type2_as_type1_ref; + CastIntBruteForceTest(__longlong_as_double_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __longlong_as_double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___longlong_as_double_Negative_RTC") { + NegativeTestRTCWrapper<3>(kLonglongAsDouble); +} + +__global__ void __hiloint2double_kernel(double* const ys, const size_t num_xs, int* const x1s, + int* const x2s) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = __hiloint2double(x1s[i], x2s[i]); + } +} + +double __hiloint2double_ref(int hi, int lo) { + uint64_t tmp0 = (static_cast(hi) << 32ull) | static_cast(lo); + double tmp1; + memcpy(&tmp1, &tmp0, sizeof(tmp0)); + + return tmp1; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__hiloint2double` for all possible inputs for hi value. The results are + * compared against reference function which performs copy of hi int value to higher part of double + * variable and copy of lo int value to lower part of double variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hiloint2double_Positive") { + double (*ref)(int, int) = __hiloint2double_ref; + CastBinaryIntRangeTest(__hiloint2double_kernel, ref, EqValidatorBuilderFactory()); +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass argument of invalid type for __hiloint2double. + * + * Test source + * ------------------------ + * - unit/math/casting_int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hiloint2double_Negative_RTC") { NegativeTestRTCWrapper<5>(kHilo2Double); } \ No newline at end of file diff --git a/catch/unit/math/casting_int_negative_kernels.cc b/catch/unit/math/casting_int_negative_kernels.cc new file mode 100644 index 0000000000..3f2586d738 --- /dev/null +++ b/catch/unit/math/casting_int_negative_kernels.cc @@ -0,0 +1,79 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL_ONE_ARG(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T1* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T2 x) { *result = func_name(x); } + +#define NEGATIVE_KERNELS_SHELL_TWO_ARGS(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x, T2 y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(T1* result, T2 x, T2* y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(T1* result, Dummy x, T2 y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v4(T1* result, T2 x, Dummy y) { \ + *result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v5(Dummy* result, T2 x, T2 y) { \ + *result = func_name(x, y); \ + } + +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rd, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rn, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_ru, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2float_rz, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rd, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rn, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_ru, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2float_rz, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rd, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rn, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_ru, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2float_rz, float, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rd, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rn, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_ru, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2float_rz, float, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int2double_rn, double, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint2double_rn, double, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rd, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rn, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_ru, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ll2double_rz, double, long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rd, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rn, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_ru, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__ull2double_rz, double, unsigned long long int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__int_as_float, float, int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__uint_as_float, float, unsigned int) +NEGATIVE_KERNELS_SHELL_ONE_ARG(__longlong_as_double, double, long long int) +NEGATIVE_KERNELS_SHELL_TWO_ARGS(__hiloint2double, double, int) \ No newline at end of file diff --git a/catch/unit/math/casting_int_negative_kernels_rtc.hh b/catch/unit/math/casting_int_negative_kernels_rtc.hh new file mode 100644 index 0000000000..acdc621f8a --- /dev/null +++ b/catch/unit/math/casting_int_negative_kernels_rtc.hh @@ -0,0 +1,215 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the int/long long type casting negative Test Cases that are using RTC. +*/ + +static constexpr auto kInt2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int2float_rd_kernel_v1(float* result, int* x) { *result = __int2float_rd(x); } + __global__ void int2float_rd_kernel_v2(float* result, Dummy x) { *result = __int2float_rd(x); } + __global__ void int2float_rd_kernel_v3(Dummy* result, int x) { *result = __int2float_rd(x); } + __global__ void int2float_rn_kernel_v1(float* result, int* x) { *result = __int2float_rn(x); } + __global__ void int2float_rn_kernel_v2(float* result, Dummy x) { *result = __int2float_rn(x); } + __global__ void int2float_rn_kernel_v3(Dummy* result, int x) { *result = __int2float_rn(x); } + __global__ void int2float_ru_kernel_v1(float* result, int* x) { *result = __int2float_ru(x); } + __global__ void int2float_ru_kernel_v2(float* result, Dummy x) { *result = __int2float_ru(x); } + __global__ void int2float_ru_kernel_v3(Dummy* result, int x) { *result = __int2float_ru(x); } + __global__ void int2float_rz_kernel_v1(float* result, int* x) { *result = __int2float_rz(x); } + __global__ void int2float_rz_kernel_v2(float* result, Dummy x) { *result = __int2float_rz(x); } + __global__ void int2float_rz_kernel_v3(Dummy* result, int x) { *result = __int2float_rz(x); } +)"}; + +static constexpr auto kUint2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint2float_rd_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rd_kernel_v2(float* result, Dummy x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rd_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rd(x); } + __global__ void uint2float_rn_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rn(x); } + __global__ void uint2float_rn_kernel_v2(float* result, Dummy x) { *result = __uint2float_rn(x); } + __global__ void uint2float_rn_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rn(x); } + __global__ void uint2float_ru_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_ru(x); } + __global__ void uint2float_ru_kernel_v2(float* result, Dummy x) { *result = __uint2float_ru(x); } + __global__ void uint2float_ru_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_ru(x); } + __global__ void uint2float_rz_kernel_v1(float* result, unsigned int* x) { *result = __uint2float_rz(x); } + __global__ void uint2float_rz_kernel_v2(float* result, Dummy x) { *result = __uint2float_rz(x); } + __global__ void uint2float_rz_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2float_rz(x); } +)"}; + +static constexpr auto kLL2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ll2float_rd_kernel_v1(float* result, long long int* x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rd_kernel_v2(float* result, Dummy x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rd_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rd(x); } + __global__ void ll2float_rn_kernel_v1(float* result, long long int* x) { *result = __ll2float_rn(x); } + __global__ void ll2float_rn_kernel_v2(float* result, Dummy x) { *result = __ll2float_rn(x); } + __global__ void ll2float_rn_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rn(x); } + __global__ void ll2float_ru_kernel_v1(float* result, long long int* x) { *result = __ll2float_ru(x); } + __global__ void ll2float_ru_kernel_v2(float* result, Dummy x) { *result = __ll2float_ru(x); } + __global__ void ll2float_ru_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_ru(x); } + __global__ void ll2float_rz_kernel_v1(float* result, long long int* x) { *result = __ll2float_rz(x); } + __global__ void ll2float_rz_kernel_v2(float* result, Dummy x) { *result = __ll2float_rz(x); } + __global__ void ll2float_rz_kernel_v3(Dummy* result, long long int x) { *result = __ll2float_rz(x); } +)"}; + +static constexpr auto kULL2Float{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ull2float_rd_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rd_kernel_v2(float* result, Dummy x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rd_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rd(x); } + __global__ void ull2float_rn_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rn(x); } + __global__ void ull2float_rn_kernel_v2(float* result, Dummy x) { *result = __ull2float_rn(x); } + __global__ void ull2float_rn_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rn(x); } + __global__ void ull2float_ru_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_ru(x); } + __global__ void ull2float_ru_kernel_v2(float* result, Dummy x) { *result = __ull2float_ru(x); } + __global__ void ull2float_ru_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_ru(x); } + __global__ void ull2float_rz_kernel_v1(float* result, unsigned long long int* x) { *result = __ull2float_rz(x); } + __global__ void ull2float_rz_kernel_v2(float* result, Dummy x) { *result = __ull2float_rz(x); } + __global__ void ull2float_rz_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2float_rz(x); } +)"}; + +static constexpr auto kIntAsFloat{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int_as_float_kernel_v1(float* result, int* x) { *result = __int_as_float(x); } + __global__ void int_as_float_kernel_v2(float* result, Dummy x) { *result = __int_as_float(x); } + __global__ void int_as_float_kernel_v3(Dummy* result, int x) { *result = __int_as_float(x); } +)"}; + +static constexpr auto kUintAsFloat{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint_as_float_kernel_v1(float* result, unsigned int* x) { *result = __uint_as_float(x); } + __global__ void uint_as_float_kernel_v2(float* result, Dummy x) { *result = __uint_as_float(x); } + __global__ void uint_as_float_kernel_v3(Dummy* result, unsigned int x) { *result = __uint_as_float(x); } +)"}; + +static constexpr auto kInt2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void int2double_rn_kernel_v1(double* result, int* x) { *result = __int2double_rn(x); } + __global__ void int2double_rn_kernel_v2(double* result, Dummy x) { *result = __int2double_rn(x); } + __global__ void int2double_rn_kernel_v3(Dummy* result, int x) { *result = __int2double_rn(x); } +)"}; + +static constexpr auto kUint2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void uint2double_rn_kernel_v1(double* result, unsigned int* x) { *result = __uint2double_rn(x); } + __global__ void uint2double_rn_kernel_v2(double* result, Dummy x) { *result = __uint2double_rn(x); } + __global__ void uint2double_rn_kernel_v3(Dummy* result, unsigned int x) { *result = __uint2double_rn(x); } +)"}; + + +static constexpr auto kLL2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ll2double_rd_kernel_v1(double* result, long long int* x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rd_kernel_v2(double* result, Dummy x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rd_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rd(x); } + __global__ void ll2double_rn_kernel_v1(double* result, long long int* x) { *result = __ll2double_rn(x); } + __global__ void ll2double_rn_kernel_v2(double* result, Dummy x) { *result = __ll2double_rn(x); } + __global__ void ll2double_rn_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rn(x); } + __global__ void ll2double_ru_kernel_v1(double* result, long long int* x) { *result = __ll2double_ru(x); } + __global__ void ll2double_ru_kernel_v2(double* result, Dummy x) { *result = __ll2double_ru(x); } + __global__ void ll2double_ru_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_ru(x); } + __global__ void ll2double_rz_kernel_v1(double* result, long long int* x) { *result = __ll2double_rz(x); } + __global__ void ll2double_rz_kernel_v2(double* result, Dummy x) { *result = __ll2double_rz(x); } + __global__ void ll2double_rz_kernel_v3(Dummy* result, long long int x) { *result = __ll2double_rz(x); } +)"}; + +static constexpr auto kULL2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void ull2double_rd_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rd_kernel_v2(double* result, Dummy x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rd_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rd(x); } + __global__ void ull2double_rn_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rn(x); } + __global__ void ull2double_rn_kernel_v2(double* result, Dummy x) { *result = __ull2double_rn(x); } + __global__ void ull2double_rn_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rn(x); } + __global__ void ull2double_ru_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_ru(x); } + __global__ void ull2double_ru_kernel_v2(double* result, Dummy x) { *result = __ull2double_ru(x); } + __global__ void ull2double_ru_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_ru(x); } + __global__ void ull2double_rz_kernel_v1(double* result, unsigned long long int* x) { *result = __ull2double_rz(x); } + __global__ void ull2double_rz_kernel_v2(double* result, Dummy x) { *result = __ull2double_rz(x); } + __global__ void ull2double_rz_kernel_v3(Dummy* result, unsigned long long int x) { *result = __ull2double_rz(x); } +)"}; + +static constexpr auto kLonglongAsDouble{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void longlong_as_double_kernel_v1(double* result, long long int* x) { *result = __longlong_as_double(x); } + __global__ void longlong_as_double_kernel_v2(double* result, Dummy x) { *result = __longlong_as_double(x); } + __global__ void longlong_as_double_kernel_v3(Dummy* result, long long int x) { *result = __longlong_as_double(x); } +)"}; + +static constexpr auto kHilo2Double{R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + __global__ void hiloint2double_kernel_v1(double* result, int* x, int y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v2(double* result, int x, int* y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v3(double* result, Dummy x, int y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v4(double* result, int x, Dummy y) { *result = __hiloint2double(x, y); } + __global__ void hiloint2double_kernel_v5(Dummy* result, int x, int y) { *result = __hiloint2double(x, y); } +)"}; + + diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh index 738dbf66c0..0cd30db404 100644 --- a/catch/unit/math/math_common.hh +++ b/catch/unit/math/math_common.hh @@ -7,8 +7,15 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +<<<<<<< HEAD The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +======= + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +>>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -100,7 +107,11 @@ template class MathTest { template void RunImpl(const ValidatorBuilder& validator_builder, const size_t grid_dim, const size_t block_dim, RT (*const ref_func)(RTs...), const size_t num_args, +<<<<<<< HEAD std::index_sequence is, const Ts*... xss) { +======= + std::index_sequence, const Ts*... xss) { +>>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) const auto xss_tup = std::make_tuple(xss...); constexpr auto f = [](auto dst, auto src, size_t size) { From fa5ba557a5ea7edbb15932a6ca40e584dc55fb4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:54:17 +0100 Subject: [PATCH 22/71] EXSWHTEC-300 - Extend tests for atomic bitwise operations #288 Change-Id: I499a1c57400f13ebdf056d093228501f4eb2cb31 --- catch/unit/atomics/CMakeLists.txt | 3 + catch/unit/atomics/__hip_atomic_fetch_and.cc | 187 +++++++++++++++++++ catch/unit/atomics/__hip_atomic_fetch_or.cc | 187 +++++++++++++++++++ catch/unit/atomics/__hip_atomic_fetch_xor.cc | 187 +++++++++++++++++++ catch/unit/atomics/bitwise_common.hh | 113 ++++++++--- 5 files changed, 654 insertions(+), 23 deletions(-) create mode 100644 catch/unit/atomics/__hip_atomic_fetch_and.cc create mode 100644 catch/unit/atomics/__hip_atomic_fetch_or.cc create mode 100644 catch/unit/atomics/__hip_atomic_fetch_xor.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index f18abbf3e5..bfe6e6bf59 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -35,6 +35,9 @@ set(TEST_SRC unsafeAtomicMax.cc atomicExch.cc atomicExch_system.cc + __hip_atomic_fetch_and.cc + __hip_atomic_fetch_or.cc + __hip_atomic_fetch_xor.cc ) if(HIP_PLATFORM MATCHES "nvidia") diff --git a/catch/unit/atomics/__hip_atomic_fetch_and.cc b/catch/unit/atomics/__hip_atomic_fetch_and.cc new file mode 100644 index 0000000000..51fd37bf59 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_and.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_and __hip_atomic_fetch_and + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_and.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/__hip_atomic_fetch_or.cc b/catch/unit/atomics/__hip_atomic_fetch_or.cc new file mode 100644 index 0000000000..000df50f80 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_or.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_or __hip_atomic_fetch_or + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_or.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/__hip_atomic_fetch_xor.cc b/catch/unit/atomics/__hip_atomic_fetch_xor.cc new file mode 100644 index 0000000000..0f3f3f3743 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_xor.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "bitwise_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_xor __hip_atomic_fetch_xor + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + Bitwise::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_xor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + Bitwise::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/bitwise_common.hh b/catch/unit/atomics/bitwise_common.hh index 9e71c99cb0..887d25d4f9 100644 --- a/catch/unit/atomics/bitwise_common.hh +++ b/catch/unit/atomics/bitwise_common.hh @@ -22,10 +22,10 @@ THE SOFTWARE. #pragma once +#include #include #include #include -#include namespace cg = cooperative_groups; @@ -37,6 +37,9 @@ enum class AtomicOperation { kOrSystem, kXor, kXorSystem, + kBuiltinAnd, + kBuiltinOr, + kBuiltinXor }; constexpr auto kMask = 0xAAAA; @@ -52,7 +55,7 @@ __host__ __device__ TestType GetTestValue() { return kTestValue; } -template +template __device__ TestType PerformAtomicOperation(TestType* const mem) { const auto mask = kMask; @@ -68,10 +71,17 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return atomicXor(mem, mask); } else if constexpr (operation == AtomicOperation::kXorSystem) { return atomicXor_system(mem, mask); + } else if constexpr (operation == AtomicOperation::kBuiltinAnd) { + return __hip_atomic_fetch_and(mem, mask, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinOr) { + return __hip_atomic_fetch_or(mem, mask, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinXor) { + return __hip_atomic_fetch_xor(mem, mask, __ATOMIC_RELAXED, memory_scope); } } -template +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { __shared__ TestType shared_mem; @@ -84,7 +94,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) __syncthreads(); } - old_vals[tid] = PerformAtomicOperation(mem); + old_vals[tid] = PerformAtomicOperation(mem); if constexpr (use_shared_mem) { __syncthreads(); @@ -99,7 +109,16 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned return reinterpret_cast(byte_ptr + idx * pitch); } -template +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, const unsigned int width, const unsigned pitch) { extern __shared__ uint8_t shared_mem[]; @@ -116,8 +135,18 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, __syncthreads(); } - old_vals[tid] = - PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } if constexpr (use_shared_mem) { __syncthreads(); @@ -157,13 +186,16 @@ std::tuple, std::vector> TestKernelHostRef(const auto& res = res_vals[tid % p.width]; old_vals.push_back(res); - if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) { + if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem || + operation == AtomicOperation::kBuiltinAnd) { res = res & mask; } else if constexpr (operation == AtomicOperation::kOr || - operation == AtomicOperation::kOrSystem) { + operation == AtomicOperation::kOrSystem || + operation == AtomicOperation::kBuiltinOr) { res = res | mask; } else if constexpr (operation == AtomicOperation::kXor || - operation == AtomicOperation::kXorSystem) { + operation == AtomicOperation::kXorSystem || + operation == AtomicOperation::kBuiltinXor) { res = res ^ mask; } } @@ -188,19 +220,21 @@ void Verify(const TestParams& p, std::vector& res_vals, std::vector +template void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, TestType* const old_vals) { const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; if (p.width == 1 && p.pitch == sizeof(TestType)) - TestKernel + TestKernel <<>>(mem_ptr, old_vals); else - TestKernel + TestKernel <<>>(mem_ptr, old_vals, p.width, p.pitch); } -template +template void TestCore(const TestParams& p) { const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); std::vector> old_vals_devs; @@ -232,7 +266,8 @@ void TestCore(const TestParams& p) { for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); - LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); } } @@ -247,17 +282,48 @@ void TestCore(const TestParams& p) { Verify(p, res_vals, old_vals); } -template +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +template void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { TestParams params; params.num_devices = 1; params.kernel_count = 1; - params.threads = GENERATE(dim3(1023)); + if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } params.width = width; params.pitch = pitch; SECTION("Global memory") { - params.blocks = GENERATE(dim3(3)); + if constexpr ((operation == AtomicOperation::kBuiltinAnd || + operation == AtomicOperation::kBuiltinOr || + operation == AtomicOperation::kBuiltinXor) && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } using LA = LinearAllocs; for (const auto alloc_type : {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { @@ -288,8 +354,8 @@ void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsig TestParams params; params.num_devices = 1; params.kernel_count = kernel_count; - params.blocks = GENERATE(dim3(3)); - params.threads = GENERATE(dim3(1023)); + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); params.width = width; params.pitch = pitch; @@ -329,8 +395,8 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, TestParams params; params.num_devices = num_devices; params.kernel_count = kernel_count; - params.blocks = GENERATE(dim3(3)); - params.threads = GENERATE(dim3(1023)); + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); params.width = width; params.pitch = pitch; @@ -338,8 +404,9 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - TestCore(params); + TestCore(params); } } } + } // namespace Bitwise From 9d52facc34ab428195c41e0eef92433d95acc295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:43:01 +0100 Subject: [PATCH 23/71] EXSWHTEC-301 - Extend tests for atomic min/max operations #289 Change-Id: Ie9690b7986b22be9b6abdbd33ebf0e2d5fb56d66 --- catch/unit/atomics/CMakeLists.txt | 2 + catch/unit/atomics/__hip_atomic_fetch_max.cc | 187 +++++++++++++++++++ catch/unit/atomics/__hip_atomic_fetch_min.cc | 187 +++++++++++++++++++ catch/unit/atomics/min_max_common.hh | 103 +++++++--- 4 files changed, 458 insertions(+), 21 deletions(-) create mode 100644 catch/unit/atomics/__hip_atomic_fetch_max.cc create mode 100644 catch/unit/atomics/__hip_atomic_fetch_min.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index bfe6e6bf59..bb3b8db915 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -33,6 +33,8 @@ set(TEST_SRC unsafeAtomicMin.cc safeAtomicMax.cc unsafeAtomicMax.cc + __hip_atomic_fetch_min.cc + __hip_atomic_fetch_max.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc diff --git a/catch/unit/atomics/__hip_atomic_fetch_max.cc b/catch/unit/atomics/__hip_atomic_fetch_max.cc new file mode 100644 index 0000000000..cc42309333 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_max.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_max __hip_atomic_fetch_max + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MAX with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_max.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/__hip_atomic_fetch_min.cc b/catch/unit/atomics/__hip_atomic_fetch_min.cc new file mode 100644 index 0000000000..f09a3732f9 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_min.cc @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_min __hip_atomic_fetch_min + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WAVEFRONT from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Wavefront_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on the same + * address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_SameAddress", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on adjacent + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs a builtin atomic MIN with memory scope WORKGROUP from multiple threads on scattered + * addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_min.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Workgroup_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/min_max_common.hh b/catch/unit/atomics/min_max_common.hh index c171c6f3c6..bd2934f4ee 100644 --- a/catch/unit/atomics/min_max_common.hh +++ b/catch/unit/atomics/min_max_common.hh @@ -39,6 +39,9 @@ enum class AtomicOperation { kUnsafeMin, kSafeMax, kUnsafeMax + kUnsafeMax, + kBuiltinMin, + kBuiltinMax }; constexpr auto kIntegerTestValue = 5; @@ -58,7 +61,7 @@ __host__ __device__ TestType GetTestValue() { return test_value + 2; } -template +template __device__ TestType PerformAtomicOperation(TestType* const mem) { const auto val = GetTestValue(); @@ -78,10 +81,15 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return unsafeAtomicMax(mem, val); } else if constexpr (operation == AtomicOperation::kSafeMax) { return safeAtomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kBuiltinMin) { + return __hip_atomic_fetch_min(mem, val, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinMax) { + return __hip_atomic_fetch_max(mem, val, __ATOMIC_RELAXED, memory_scope); } } -template +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { __shared__ TestType shared_mem; @@ -94,7 +102,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) __syncthreads(); } - old_vals[tid] = PerformAtomicOperation(mem); + old_vals[tid] = PerformAtomicOperation(mem); if constexpr (use_shared_mem) { __syncthreads(); @@ -109,7 +117,16 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned return reinterpret_cast(byte_ptr + idx * pitch); } -template +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, const unsigned int width, const unsigned pitch) { extern __shared__ uint8_t shared_mem[]; @@ -126,8 +143,18 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, __syncthreads(); } - old_vals[tid] = - PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } if constexpr (use_shared_mem) { __syncthreads(); @@ -172,12 +199,14 @@ std::tuple, std::vector> TestKernelHostRef(const if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || operation == AtomicOperation::kUnsafeMin || - operation == AtomicOperation::kSafeMin) { + operation == AtomicOperation::kSafeMin || + operation == AtomicOperation::kBuiltinMin) { res = std::min(res, val); } else if constexpr (operation == AtomicOperation::kMax || operation == AtomicOperation::kMaxSystem || operation == AtomicOperation::kUnsafeMax || - operation == AtomicOperation::kSafeMax) { + operation == AtomicOperation::kSafeMax || + operation == AtomicOperation::kBuiltinMax) { res = std::max(res, val); } } @@ -202,19 +231,21 @@ void Verify(const TestParams& p, std::vector& res_vals, std::vector +template void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, TestType* const old_vals) { const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; if (p.width == 1 && p.pitch == sizeof(TestType)) - TestKernel + TestKernel <<>>(mem_ptr, old_vals); else - TestKernel + TestKernel <<>>(mem_ptr, old_vals, p.width, p.pitch); } -template +template void TestCore(const TestParams& p) { const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); std::vector> old_vals_devs; @@ -247,7 +278,8 @@ void TestCore(const TestParams& p) { for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); - LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); } } @@ -262,17 +294,45 @@ void TestCore(const TestParams& p) { Verify(p, res_vals, old_vals); } -template +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +template void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { TestParams params; params.num_devices = 1; params.kernel_count = 1; - params.threads = GENERATE(dim3(1023)); + if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } params.width = width; params.pitch = pitch; SECTION("Global memory") { - params.blocks = GENERATE(dim3(3)); + if constexpr ((operation == AtomicOperation::kBuiltinMin || + operation == AtomicOperation::kBuiltinMax) && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } using LA = LinearAllocs; for (const auto alloc_type : {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { @@ -303,8 +363,8 @@ void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsig TestParams params; params.num_devices = 1; params.kernel_count = kernel_count; - params.blocks = GENERATE(dim3(3)); - params.threads = GENERATE(dim3(1023)); + params.blocks = GenerateThreadDimensions(); + params.threads = GenerateBlockDimensions(); params.width = width; params.pitch = pitch; @@ -344,8 +404,8 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, TestParams params; params.num_devices = num_devices; params.kernel_count = kernel_count; - params.blocks = GENERATE(dim3(3)); - params.threads = GENERATE(dim3(1023)); + params.blocks = GenerateThreadDimensions(); + params.threads = GenerateBlockDimensions(); params.width = width; params.pitch = pitch; @@ -353,8 +413,9 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - TestCore(params); + TestCore(params); } } } + } // namespace MinMax From 73e8f26cf464f14eeb478de300775454d01020d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:11:02 +0100 Subject: [PATCH 24/71] EXSWHTEC-298 - Extend tests for atomic exchange operations #290 Change-Id: I19b4cb6177f3caa74c3f889de107e349ebb1d37c --- catch/unit/atomics/CMakeLists.txt | 1 + catch/unit/atomics/__hip_atomic_exchange.cc | 136 ++++++++++++++++++++ catch/unit/atomics/atomicExch_common.hh | 75 ++++++++--- 3 files changed, 194 insertions(+), 18 deletions(-) create mode 100644 catch/unit/atomics/__hip_atomic_exchange.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index bb3b8db915..ed622195a9 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -40,6 +40,7 @@ set(TEST_SRC __hip_atomic_fetch_and.cc __hip_atomic_fetch_or.cc __hip_atomic_fetch_xor.cc + __hip_atomic_exchange.cc ) if(HIP_PLATFORM MATCHES "nvidia") diff --git a/catch/unit/atomics/__hip_atomic_exchange.cc b/catch/unit/atomics/__hip_atomic_exchange.cc new file mode 100644 index 0000000000..a518aaafbe --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_exchange.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicExch_common.hh" + +/** + * @addtogroup __hip_atomic_exchange __hip_atomic_exchange + * @{ + * @ingroup AtomicsTest + * ________________________ + * Test cases from other modules: + * - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * exchange into a runtime determined memory location. Each thread will exchange its own grid wide + * linear index + offset into the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the union of output array and exchange memory is + * validated to contain all values in the range [0, number_of_threads + + * number_of_exchange_memory_slots). Several memory access patterns are tested: + * -# All threads exchange to a single memory location + * -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicExch + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory + * - Exchange memory located in shared memory + * - WAVEFRONT memory scope + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_exchange.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Wavefront", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + AtomicExchSingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * exchange into a runtime determined memory location. Each thread will exchange its own grid wide + * linear index + offset into the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the union of output array and exchange memory is + * validated to contain all values in the range [0, number_of_threads + + * number_of_exchange_memory_slots). Several memory access patterns are tested: + * -# All threads exchange to a single memory location + * -# Each thread exchanges into an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the exchange elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicExch + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated exchange memory + * - Exchange memory located in shared memory + * - WORKGROUP memory scope + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_exchange.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_exchange_Positive_Workgroup", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + AtomicExchSingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + AtomicExchSingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicExch_common.hh b/catch/unit/atomics/atomicExch_common.hh index 1b4add5253..90e78edcc2 100644 --- a/catch/unit/atomics/atomicExch_common.hh +++ b/catch/unit/atomics/atomicExch_common.hh @@ -22,24 +22,26 @@ THE SOFTWARE. #pragma once -#include - +#include #include #include #include -#include -enum class AtomicScopes { device, system }; +enum class AtomicScopes { device, system, builtin }; -template __device__ T perform_atomic_exch(T* address, T val) { +template +__device__ T perform_atomic_exch(T* address, T val) { if constexpr (scope == AtomicScopes::device) { return atomicExch(address, val); } else if (scope == AtomicScopes::system) { return atomicExch_system(address, val); + } else if (scope == AtomicScopes::builtin) { + return __hip_atomic_exchange(address, val, __ATOMIC_RELAXED, memory_scope); } } -template +template __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const old_vals) { __shared__ T shared_mem; @@ -52,7 +54,7 @@ __global__ void atomic_exch_kernel_compile_time(T* const global_mem, T* const ol __syncthreads(); } - old_vals[tid] = perform_atomic_exch(mem, static_cast(tid + 1)); + old_vals[tid] = perform_atomic_exch(mem, static_cast(tid + 1)); if constexpr (use_shared_mem) { __syncthreads(); @@ -67,7 +69,16 @@ __host__ __device__ T* pitched_offset(T* const ptr, const unsigned int pitch, return reinterpret_cast(byte_ptr + idx * pitch); } -template +__device__ void generate_memory_traffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + +template __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const unsigned int width, const unsigned pitch, const T base_val = 0) { extern __shared__ uint8_t shared_mem[]; @@ -84,8 +95,18 @@ __global__ void atomic_exch_kernel(T* const global_mem, T* const old_vals, const __syncthreads(); } - old_vals[tid] = perform_atomic_exch(pitched_offset(mem, pitch, tid % width), - base_val + static_cast(tid + width)); + const auto n = cooperative_groups::this_grid().size() - width; + + T* atomic_addr = pitched_offset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = perform_atomic_exch( + pitched_offset(mem, pitch, tid % width), base_val + static_cast(tid + width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + generate_memory_traffic(begin_addr, end_addr); + } if constexpr (use_shared_mem) { __syncthreads(); @@ -255,14 +276,16 @@ class AtomicExchCRTP { } }; -template +template class AtomicExch : public AtomicExchCRTP, T, use_shared_mem, scope> { public: void LaunchKernel(const unsigned int shared_mem_size, const hipStream_t stream, T* const mem, T* const old_vals, const T base_val, const AtomicExchParams& p) const { - atomic_exch_kernel<<>>( - mem, old_vals, p.width, p.pitch, base_val); + atomic_exch_kernel + <<>>(mem, old_vals, p.width, p.pitch, + base_val); } void ValidateResults(std::vector& old_vals) const { @@ -281,23 +304,39 @@ inline dim3 GenerateAtomicExchBlockDimensions() { return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); } -template +template void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { AtomicExchParams params; params.num_devices = 1; params.kernel_count = 1; - params.threads = GenerateAtomicExchThreadDimensions(); + if constexpr (scope == AtomicScopes::builtin && memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr (scope == AtomicScopes::builtin && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateAtomicExchThreadDimensions(); + } params.width = width; params.pitch = pitch; SECTION("Global memory") { - params.blocks = GenerateAtomicExchBlockDimensions(); + if constexpr (scope == AtomicScopes::builtin && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateAtomicExchBlockDimensions(); + } using LA = LinearAllocs; for (const auto alloc_type : {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - AtomicExch().run(params); + AtomicExch().run(params); } } } @@ -305,7 +344,7 @@ void AtomicExchSingleDeviceSingleKernelTest(const unsigned int width, const unsi SECTION("Shared memory") { params.blocks = dim3(1); params.alloc_type = LinearAllocs::hipMalloc; - AtomicExch().run(params); + AtomicExch().run(params); } } From 0972ef88aacbec249a0193377e7887ef5981dd74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:45:02 +0100 Subject: [PATCH 25/71] EXSWHTEC-297 - Introduce build dependencies for builtin atomic operations tests #292 Change-Id: Ie16d2175e330522c226cde9f30a1f326782a4551 --- catch/include/hip_test_defgroups.hh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 3fb6c774ef..1ab256ec10 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -39,6 +39,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup AtomicsTest Device Atomics + * @{ + * This section describes tests for the Device Atomic APIs. + * @} + */ + /** * @defgroup DeviceLanguageTest Device Language * @{ From 8f112a639a8163b074ba265ec81bcd4244f944b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:06:06 +0100 Subject: [PATCH 26/71] EXSWHTEC-296 - Implement negative RTC tests for builtin atomic operations #291 Change-Id: I240e606ea2b112837301ec7afe2f6501b47071ef --- catch/unit/atomics/CMakeLists.txt | 1 + catch/unit/atomics/atomic_builtins.cc | 97 +++ .../atomics/atomic_builtins_kernels_rtc.hh | 590 ++++++++++++++++++ 3 files changed, 688 insertions(+) create mode 100644 catch/unit/atomics/atomic_builtins.cc create mode 100644 catch/unit/atomics/atomic_builtins_kernels_rtc.hh diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index ed622195a9..1c40746dff 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -35,6 +35,7 @@ set(TEST_SRC unsafeAtomicMax.cc __hip_atomic_fetch_min.cc __hip_atomic_fetch_max.cc + atomic_builtins.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc diff --git a/catch/unit/atomics/atomic_builtins.cc b/catch/unit/atomics/atomic_builtins.cc new file mode 100644 index 0000000000..c5ade6b30a --- /dev/null +++ b/catch/unit/atomics/atomic_builtins.cc @@ -0,0 +1,97 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include "atomic_builtins_kernels_rtc.hh" + +/** + * @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add + * @{ + * @ingroup AtomicsTest + */ + +void AtomicBuiltinsRTCWrapper(const char* program_source, int expected_errors_num, + int expected_warnings_num) { + hiprtcProgram program{}; + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "atomics_builtins_kernels.cc", 0, + nullptr, nullptr)); + + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + int warning_count{0}; + + std::string error_message{"error:"}; + std::string warning_message{"warning:"}; + + size_t npos_e = log.find(error_message, 0); + while (npos_e != std::string::npos) { + ++error_count; + npos_e = log.find(error_message, npos_e + 1); + } + + size_t npos_w = log.find(warning_message, 0); + while (npos_w != std::string::npos) { + ++warning_count; + npos_w = log.find(warning_message, npos_w + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_errors_num); + REQUIRE(warning_count == expected_warnings_num); +} + +/** + * Test Description + * ------------------------ + * - Compiles atomic builtins while passing parameters that shall cause: + * -# Compiler warnings + * -# Compiler errors + * Test source + * ------------------------ + * - unit/atomics/atomic_builtins.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_AtomicBuiltins_Negative_Parameters_RTC") { + AtomicBuiltinsRTCWrapper(kBuiltinStore, 5, 5); + AtomicBuiltinsRTCWrapper(kBuiltinLoad, 4, 4); + /* Begin: Should be 5 errors, 6 warnings for both. See EXSWHTEC-309*/ + AtomicBuiltinsRTCWrapper(kBuiltinCompExWeak, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinCompExStrong, 5, 2); + /* End. */ + AtomicBuiltinsRTCWrapper(kBuiltinExchange, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchAdd, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchAnd, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchOr, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchXor, 7, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchMax, 5, 2); + AtomicBuiltinsRTCWrapper(kBuiltinFetchMin, 5, 2); +} diff --git a/catch/unit/atomics/atomic_builtins_kernels_rtc.hh b/catch/unit/atomics/atomic_builtins_kernels_rtc.hh new file mode 100644 index 0000000000..1339eaaa45 --- /dev/null +++ b/catch/unit/atomics/atomic_builtins_kernels_rtc.hh @@ -0,0 +1,590 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Positive and negative kernels used for the builtin atomic Test Cases that are using RTC. +*/ + +static constexpr auto kBuiltinStore{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void StoreCompileKernel(int* x) { + __hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + __hip_atomic_store(reinterpret_cast(x), 1, kMemOrder, kMemScope); + __hip_atomic_store(*x, 1, kMemOrder, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope); + __hip_atomic_store(x, 1, -1, kMemScope); + __hip_atomic_store(x, 1, 10, kMemScope); + __hip_atomic_store(x, 1, kMemOrder, -1); + __hip_atomic_store(x, 1, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + __hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + __hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinLoad{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void LoadCompileKernel(int* x, int* y) { + *y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope); + + *y = __hip_atomic_load(*x, kMemOrder, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope); + *y = __hip_atomic_load(x, -1, kMemScope); + *y = __hip_atomic_load(x, 10, kMemScope); + *y = __hip_atomic_load(x, kMemOrder, -1); + *y = __hip_atomic_load(x, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinCompExWeak{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void CompareWeakCompileKernel(int* x, int* expected) { + bool res{false}; + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + res = __hip_atomic_compare_exchange_weak(reinterpret_cast(x), expected, 1, kMemOrder, + kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder, + kMemScope); + } +)"}; + +static constexpr auto kBuiltinCompExStrong{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void CompareStrongCompileKernel(int* x, int* expected) { + bool res{false}; + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + res = __hip_atomic_compare_exchange_strong(reinterpret_cast(x), expected, 1, + kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, + kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinExchange{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void ExchangeCompileKernel(int* x) { + int old{}; + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_exchange(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_exchange(x, 1, -1, kMemScope); + old = __hip_atomic_exchange(x, 1, 10, kMemScope); + old = __hip_atomic_exchange(x, 1, kMemOrder, -1); + old = __hip_atomic_exchange(x, 1, kMemOrder, 10); + + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope); + + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchAdd{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchAddCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_add(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_add(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_add(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchAnd{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchAndCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_and(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_and(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_and(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchOr{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchOrCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_or(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_or(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_or(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static auto constexpr kBuiltinFetchXor{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchXorCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_xor(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchMax{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchMaxCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_max(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_max(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_max(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope); + } +)"}; + +static constexpr auto kBuiltinFetchMin{R"( + constexpr int kMemOrder = __ATOMIC_RELAXED; + constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + + class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; + }; + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void FetchMinCompileKernel(int* x) { + int old{}; + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + old = __hip_atomic_fetch_min(reinterpret_cast(x), 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope); + old = __hip_atomic_fetch_min(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_min(x, 1, 10, kMemScope); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope); + } +)"}; From 536c20b62d2366271f9344396ed591aee01abf21 Mon Sep 17 00:00:00 2001 From: Milos Mozetic Date: Thu, 28 Dec 2023 18:35:58 +0100 Subject: [PATCH 27/71] EXSWHTEC-314 - Implement unit tests for short vector types #297 Change-Id: I609548b79ae45530b0e82c532f93cf1f411343e4 --- catch/include/hip_test_defgroups.hh | 3 + catch/unit/vector_types/CMakeLists.txt | 58 ++ .../vector_types/negative_bitwise_double.cc | 25 + .../vector_types/negative_bitwise_float.cc | 25 + .../negative_bitwise_float_double_rtc.hh | 272 +++++++ ...calculate_assign_with_unsigned_value_1D.cc | 26 + ...calculate_assign_with_unsigned_value_2D.cc | 26 + ...calculate_assign_with_unsigned_value_3D.cc | 26 + ...calculate_assign_with_unsigned_value_4D.cc | 26 + ...negative_calculate_assign_with_value_1D.cc | 26 + ...negative_calculate_assign_with_value_2D.cc | 26 + ...negative_calculate_assign_with_value_3D.cc | 26 + ...negative_calculate_assign_with_value_4D.cc | 26 + ...egative_calculate_assign_with_value_rtc.hh | 750 ++++++++++++++++++ .../vector_types/negative_macros_common.hh | 78 ++ .../vector_types/negative_negate_unsigned.cc | 41 + .../negative_negate_unsigned_rtc.hh | 150 ++++ .../vector_types/vector_operations_common.hh | 338 ++++++++ catch/unit/vector_types/vector_types.cc | 357 +++++++++ .../unit/vector_types/vector_types_common.hh | 187 +++++ 20 files changed, 2492 insertions(+) create mode 100644 catch/unit/vector_types/negative_bitwise_double.cc create mode 100644 catch/unit/vector_types/negative_bitwise_float.cc create mode 100644 catch/unit/vector_types/negative_bitwise_float_double_rtc.hh create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc create mode 100644 catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh create mode 100644 catch/unit/vector_types/negative_macros_common.hh create mode 100644 catch/unit/vector_types/negative_negate_unsigned.cc create mode 100644 catch/unit/vector_types/negative_negate_unsigned_rtc.hh create mode 100644 catch/unit/vector_types/vector_operations_common.hh create mode 100644 catch/unit/vector_types/vector_types.cc create mode 100644 catch/unit/vector_types/vector_types_common.hh diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 1ab256ec10..58ecdae020 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -388,4 +388,7 @@ TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_ParseError") {} /** * End doxygen group DeviceLanguageTest. * @} + * @defgroup VectorTypeTest Vector types + * @{ + * This section describes tests for the Vector type functions and operators. */ diff --git a/catch/unit/vector_types/CMakeLists.txt b/catch/unit/vector_types/CMakeLists.txt index 49619275f3..f3aa0fb1af 100644 --- a/catch/unit/vector_types/CMakeLists.txt +++ b/catch/unit/vector_types/CMakeLists.txt @@ -21,9 +21,67 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + vector_types.cc dim3.cc ) hip_add_exe_to_target(NAME VectorTypesTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) + +if(HIP_PLATFORM MATCHES "amd") + add_test(NAME Unit_NegateUnsigned_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_negate_unsigned.cc 40) + + add_test(NAME Unit_BitwiseFloat_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_bitwise_float.cc 96) + + add_test(NAME Unit_BitwiseDouble_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_bitwise_double.cc 96) + + add_test(NAME Unit_CalculateAssign1D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_value_1D.cc 60) + + add_test(NAME Unit_CalculateAssign2D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_value_2D.cc 60) + + add_test(NAME Unit_CalculateAssign3D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_value_3D.cc 60) + + add_test(NAME Unit_CalculateAssign4D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_value_4D.cc 60) + + add_test(NAME Unit_CalculateAssignUnsigned1D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_unsigned_value_1D.cc 60) + + add_test(NAME Unit_CalculateAssignUnsigned2D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_unsigned_value_2D.cc 60) + + add_test(NAME Unit_CalculateAssignUnsigned3D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_unsigned_value_3D.cc 60) + + add_test(NAME Unit_CalculateAssignUnsigned4D_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + negative_calculate_assign_with_unsigned_value_4D.cc 60) +endif() diff --git a/catch/unit/vector_types/negative_bitwise_double.cc b/catch/unit/vector_types/negative_bitwise_double.cc new file mode 100644 index 0000000000..e3a88bfe28 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_double.cc @@ -0,0 +1,25 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double1) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double2) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double3) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(double4) diff --git a/catch/unit/vector_types/negative_bitwise_float.cc b/catch/unit/vector_types/negative_bitwise_float.cc new file mode 100644 index 0000000000..cc7185f098 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_float.cc @@ -0,0 +1,25 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float1) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float2) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float3) +BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(float4) diff --git a/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh b/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh new file mode 100644 index 0000000000..bf5eed0424 --- /dev/null +++ b/catch/unit/vector_types/negative_bitwise_float_double_rtc.hh @@ -0,0 +1,272 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kBitwiseFloat{R"( + __global__ void BitwiseDevice(float1* vector1_dev_ptr, float1* vector2_dev_ptr) { \ + float1 vector1_dev = *vector1_dev_ptr; \ + float1 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float1& vector1_host, float1& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float2* vector1_dev_ptr, float2* vector2_dev_ptr) { \ + float2 vector1_dev = *vector1_dev_ptr; \ + float2 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float2& vector1_host, float2& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float3* vector1_dev_ptr, float3* vector2_dev_ptr) { \ + float3 vector1_dev = *vector1_dev_ptr; \ + float3 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float3& vector1_host, float3& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(float4* vector1_dev_ptr, float4* vector2_dev_ptr) { \ + float4 vector1_dev = *vector1_dev_ptr; \ + float4 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(float4& vector1_host, float4& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } +)"}; + +static constexpr auto kBitwiseDouble{R"( + __global__ void BitwiseDevice(double1* vector1_dev_ptr, double1* vector2_dev_ptr) { \ + double1 vector1_dev = *vector1_dev_ptr; \ + double1 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double1& vector1_host, double1& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double2* vector1_dev_ptr, double2* vector2_dev_ptr) { \ + double2 vector1_dev = *vector1_dev_ptr; \ + double2 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double2& vector1_host, double2& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double3* vector1_dev_ptr, double3* vector2_dev_ptr) { \ + double3 vector1_dev = *vector1_dev_ptr; \ + double3 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double3& vector1_host, double3& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + + __global__ void BitwiseDevice(double4* vector1_dev_ptr, double4* vector2_dev_ptr) { \ + double4 vector1_dev = *vector1_dev_ptr; \ + double4 vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(double4& vector1_host, double4& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } +)"}; diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc new file mode 100644 index 0000000000..f95f2e5f0d --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_1D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong1) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc new file mode 100644 index 0000000000..9f69fc18e7 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_2D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong2) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc new file mode 100644 index 0000000000..99f9d8130e --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_3D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong3) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc new file mode 100644 index 0000000000..301ae99c61 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_unsigned_value_4D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uchar4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ushort4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(uint4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulong4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(ulonglong4) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc new file mode 100644 index 0000000000..09d7267186 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_1D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long1) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong1) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc new file mode 100644 index 0000000000..173d5fcb4e --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_2D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long2) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong2) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc new file mode 100644 index 0000000000..8cd3145aa8 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_3D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long3) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong3) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc b/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc new file mode 100644 index 0000000000..9679d17548 --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_4D.cc @@ -0,0 +1,26 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(char4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(short4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(int4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(long4) +CALCULATE_ASSIGN_VECTOR_FUNCTIONS(longlong4) diff --git a/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh b/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh new file mode 100644 index 0000000000..ee921a016c --- /dev/null +++ b/catch/unit/vector_types/negative_calculate_assign_with_value_rtc.hh @@ -0,0 +1,750 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of longge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kCalculateAssignChar{R"( + __global__ void CalculateAssignDevice(char1* vector_dev_ptr, decltype(char1().x) value) { + char1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char1& vector_host, decltype(char1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar1* vector_dev_ptr, decltype(uchar1().x) value) { + uchar1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar1& vector_host, decltype(uchar1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char2* vector_dev_ptr, decltype(char2().x) value) { + char2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char2& vector_host, decltype(char2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar2* vector_dev_ptr, decltype(uchar2().x) value) { + uchar2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar2& vector_host, decltype(uchar2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char3* vector_dev_ptr, decltype(char3().x) value) { + char3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char3& vector_host, decltype(char3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar3* vector_dev_ptr, decltype(uchar3().x) value) { + uchar3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar3& vector_host, decltype(uchar3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(char4* vector_dev_ptr, decltype(char4().x) value) { + char4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(char4& vector_host, decltype(char4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uchar4* vector_dev_ptr, decltype(uchar4().x) value) { + uchar4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uchar4& vector_host, decltype(uchar4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignShort{R"( + __global__ void CalculateAssignDevice(short1* vector_dev_ptr, decltype(short1().x) value) { + short1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short1& vector_host, decltype(short1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort1* vector_dev_ptr, decltype(ushort1().x) value) { + ushort1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort1& vector_host, decltype(ushort1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short2* vector_dev_ptr, decltype(short2().x) value) { + short2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short2& vector_host, decltype(short2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort2* vector_dev_ptr, decltype(ushort2().x) value) { + ushort2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort2& vector_host, decltype(ushort2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short3* vector_dev_ptr, decltype(short3().x) value) { + short3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short3& vector_host, decltype(short3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort3* vector_dev_ptr, decltype(ushort3().x) value) { + ushort3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort3& vector_host, decltype(ushort3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(short4* vector_dev_ptr, decltype(short4().x) value) { + short4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(short4& vector_host, decltype(short4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ushort4* vector_dev_ptr, decltype(ushort4().x) value) { + ushort4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ushort4& vector_host, decltype(ushort4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignInt{R"( + __global__ void CalculateAssignDevice(int1* vector_dev_ptr, decltype(int1().x) value) { + int1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int1& vector_host, decltype(int1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint1* vector_dev_ptr, decltype(uint1().x) value) { + uint1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint1& vector_host, decltype(uint1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int2* vector_dev_ptr, decltype(int2().x) value) { + int2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int2& vector_host, decltype(int2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint2* vector_dev_ptr, decltype(uint2().x) value) { + uint2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint2& vector_host, decltype(uint2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int3* vector_dev_ptr, decltype(int3().x) value) { + int3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int3& vector_host, decltype(int3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint3* vector_dev_ptr, decltype(uint3().x) value) { + uint3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint3& vector_host, decltype(uint3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(int4* vector_dev_ptr, decltype(int4().x) value) { + int4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(int4& vector_host, decltype(int4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(uint4* vector_dev_ptr, decltype(uint4().x) value) { + uint4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(uint4& vector_host, decltype(uint4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignLong{R"( + __global__ void CalculateAssignDevice(long1* vector_dev_ptr, decltype(long1().x) value) { + long1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long1& vector_host, decltype(long1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong1* vector_dev_ptr, decltype(ulong1().x) value) { + ulong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong1& vector_host, decltype(ulong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long2* vector_dev_ptr, decltype(long2().x) value) { + long2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long2& vector_host, decltype(long2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong2* vector_dev_ptr, decltype(ulong2().x) value) { + ulong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong2& vector_host, decltype(ulong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long3* vector_dev_ptr, decltype(long3().x) value) { + long3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long3& vector_host, decltype(long3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong3* vector_dev_ptr, decltype(ulong3().x) value) { + ulong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong3& vector_host, decltype(ulong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(long4* vector_dev_ptr, decltype(long4().x) value) { + long4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(long4& vector_host, decltype(long4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulong4* vector_dev_ptr, decltype(ulong4().x) value) { + ulong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulong4& vector_host, decltype(ulong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; + +static constexpr auto kCalculateAssignLongLong{R"( + __global__ void CalculateAssignDevice(longlong1* vector_dev_ptr, decltype(longlong1().x) value) { + longlong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong1& vector_host, decltype(longlong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong1* vector_dev_ptr, decltype(ulonglong1().x) value) { + ulonglong1 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong1& vector_host, decltype(ulonglong1().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong2* vector_dev_ptr, decltype(longlong2().x) value) { + longlong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong2& vector_host, decltype(longlong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong2* vector_dev_ptr, decltype(ulonglong2().x) value) { + ulonglong2 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong2& vector_host, decltype(ulonglong2().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong3* vector_dev_ptr, decltype(longlong3().x) value) { + longlong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong3& vector_host, decltype(longlong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong3* vector_dev_ptr, decltype(ulonglong3().x) value) { + ulonglong3 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong3& vector_host, decltype(ulonglong3().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(longlong4* vector_dev_ptr, decltype(longlong4().x) value) { + longlong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(longlong4& vector_host, decltype(longlong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } + + __global__ void CalculateAssignDevice(ulonglong4* vector_dev_ptr, decltype(ulonglong4().x) value) { + ulonglong4 vector_dev = *vector_dev_ptr; + vector_dev %= value; + vector_dev ^= value; + vector_dev |= value; + vector_dev &= value; + vector_dev >>= value; + vector_dev <<= value; + } + void CalculateAssignHost(ulonglong4& vector_host, decltype(ulonglong4().x) value) { + vector_host %= value; + vector_host ^= value; + vector_host |= value; + vector_host &= value; + vector_host >>= value; + vector_host <<= value; + } +)"}; diff --git a/catch/unit/vector_types/negative_macros_common.hh b/catch/unit/vector_types/negative_macros_common.hh new file mode 100644 index 0000000000..2967d44800 --- /dev/null +++ b/catch/unit/vector_types/negative_macros_common.hh @@ -0,0 +1,78 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#define NEGATE_UNSIGNED_VECTOR_FUNCTIONS(type) \ + __global__ void NegateDevice(type* vector_dev_ptr) { \ + type vector_dev = *vector_dev_ptr; \ + vector_dev = -vector_dev; \ + } \ + void NegateHost(type& vector_host) { vector_host = -vector_host; } + +#define BITWISE_FLOATING_POINT_VECTOR_FUNCTIONS(type) \ + __global__ void BitwiseDevice(type* vector1_dev_ptr, type* vector2_dev_ptr) { \ + type vector1_dev = *vector1_dev_ptr; \ + type vector2_dev = *vector2_dev_ptr; \ + vector1_dev = ~vector1_dev; \ + vector1_dev %= vector2_dev; \ + vector1_dev ^= vector2_dev; \ + vector1_dev |= vector2_dev; \ + vector1_dev &= vector2_dev; \ + vector1_dev >>= vector2_dev; \ + vector1_dev <<= vector2_dev; \ + vector1_dev = vector1_dev ^ vector2_dev; \ + vector1_dev = vector1_dev | vector2_dev; \ + vector1_dev = vector1_dev & vector2_dev; \ + vector1_dev = vector1_dev >> vector2_dev; \ + vector1_dev = vector1_dev << vector2_dev; \ + } \ + void BitwiseHost(type& vector1_host, type& vector2_host) { \ + vector1_host = ~vector1_host; \ + vector1_host %= vector2_host; \ + vector1_host ^= vector2_host; \ + vector1_host |= vector2_host; \ + vector1_host &= vector2_host; \ + vector1_host >>= vector2_host; \ + vector1_host <<= vector2_host; \ + vector1_host = vector1_host ^ vector2_host; \ + vector1_host = vector1_host | vector2_host; \ + vector1_host = vector1_host & vector2_host; \ + vector1_host = vector1_host >> vector2_host; \ + vector1_host = vector1_host << vector2_host; \ + } + +#define CALCULATE_ASSIGN_VECTOR_FUNCTIONS(type) \ + __global__ void CalculateAssignDevice(type* vector_dev_ptr, decltype(type().x) value) { \ + type vector_dev = *vector_dev_ptr; \ + vector_dev %= value; \ + vector_dev ^= value; \ + vector_dev |= value; \ + vector_dev &= value; \ + vector_dev >>= value; \ + vector_dev <<= value; \ + } \ + void CalculateAssignHost(type& vector_host, decltype(type().x) value) { \ + vector_host %= value; \ + vector_host ^= value; \ + vector_host |= value; \ + vector_host &= value; \ + vector_host >>= value; \ + vector_host <<= value; \ + } diff --git a/catch/unit/vector_types/negative_negate_unsigned.cc b/catch/unit/vector_types/negative_negate_unsigned.cc new file mode 100644 index 0000000000..c8bd39c7a8 --- /dev/null +++ b/catch/unit/vector_types/negative_negate_unsigned.cc @@ -0,0 +1,41 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "negative_macros_common.hh" + +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uchar4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ushort4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(uint4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulong4) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong1) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong2) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong3) +NEGATE_UNSIGNED_VECTOR_FUNCTIONS(ulonglong4) diff --git a/catch/unit/vector_types/negative_negate_unsigned_rtc.hh b/catch/unit/vector_types/negative_negate_unsigned_rtc.hh new file mode 100644 index 0000000000..c393570a09 --- /dev/null +++ b/catch/unit/vector_types/negative_negate_unsigned_rtc.hh @@ -0,0 +1,150 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +static constexpr auto kNegateUnsignedChar{R"( + __global__ void NegateDevice(uchar1* vector_dev_ptr) { + uchar1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar2* vector_dev_ptr) { + uchar2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar3* vector_dev_ptr) { + uchar3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uchar4* vector_dev_ptr) { + uchar4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uchar4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedShort{R"( + __global__ void NegateDevice(ushort1* vector_dev_ptr) { + ushort1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort2* vector_dev_ptr) { + ushort2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort3* vector_dev_ptr) { + ushort3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ushort4* vector_dev_ptr) { + ushort4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ushort4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedInt{R"( + __global__ void NegateDevice(uint1* vector_dev_ptr) { + uint1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint2* vector_dev_ptr) { + uint2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint3* vector_dev_ptr) { + uint3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(uint4* vector_dev_ptr) { + uint4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(uint4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedLong{R"( + __global__ void NegateDevice(ulong1* vector_dev_ptr) { + ulong1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong2* vector_dev_ptr) { + ulong2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong3* vector_dev_ptr) { + ulong3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulong4* vector_dev_ptr) { + ulong4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulong4& vector_host) { vector_host = -vector_host; } +)"}; + +static constexpr auto kNegateUnsignedLongLong{R"( + __global__ void NegateDevice(ulonglong1* vector_dev_ptr) { + ulonglong1 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong1& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong2* vector_dev_ptr) { + ulonglong2 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong2& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong3* vector_dev_ptr) { + ulonglong3 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong3& vector_host) { vector_host = -vector_host; } + + __global__ void NegateDevice(ulonglong4* vector_dev_ptr) { + ulonglong4 vector_dev = *vector_dev_ptr; + vector_dev = -vector_dev; + } + void NegateHost(ulonglong4& vector_host) { vector_host = -vector_host; } +)"}; diff --git a/catch/unit/vector_types/vector_operations_common.hh b/catch/unit/vector_types/vector_operations_common.hh new file mode 100644 index 0000000000..9684e0ecb1 --- /dev/null +++ b/catch/unit/vector_types/vector_operations_common.hh @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vector_types_common.hh" + +enum class VectorOperation { + kIncrementPrefix, + kIncrementPostfix, + kDecrementPrefix, + kDecrementPostfix, + kAddAssign, + kSubtractAssign, + kMultiplyAssign, + kDivideAssign, + kNegate, + kBitwiseNot, + kModuloAssign, + kBitwiseXorAssign, + kBitwiseOrAssign, + kBitwiseAndAssign, + kRightShiftAssign, + kLeftShiftAssign, + kAdd, + kSubtract, + kMultiply, + kDivide, + kEqual, + kNotEqual, + kModulo, + kBitwiseXor, + kBitwiseOr, + kBitwiseAnd, + kRightShift, + kLeftShift +}; + +inline std::string to_string(VectorOperation operation) { + switch (operation) { + case VectorOperation::kIncrementPrefix: + return "increment (prefix)"; + case VectorOperation::kIncrementPostfix: + return "increment (postfix)"; + case VectorOperation::kDecrementPrefix: + return "decrement (prefix)"; + case VectorOperation::kDecrementPostfix: + return "decrement (postfix)"; + case VectorOperation::kAddAssign: + return "add and assign"; + case VectorOperation::kSubtractAssign: + return "subtract and assign"; + case VectorOperation::kMultiplyAssign: + return "multiply and assign"; + case VectorOperation::kDivideAssign: + return "divide and assign"; + case VectorOperation::kNegate: + return "negate"; + case VectorOperation::kBitwiseNot: + return "bitwise not"; + case VectorOperation::kModuloAssign: + return "modulo and assign"; + case VectorOperation::kBitwiseXorAssign: + return "bitwise XOR and assign"; + case VectorOperation::kBitwiseOrAssign: + return "bitwise OR and assign"; + case VectorOperation::kBitwiseAndAssign: + return "bitwise AND and assign"; + case VectorOperation::kRightShiftAssign: + return "right shift and assign"; + case VectorOperation::kLeftShiftAssign: + return "left shift and assign"; + case VectorOperation::kAdd: + return "add"; + case VectorOperation::kSubtract: + return "subtract"; + case VectorOperation::kMultiply: + return "multiply"; + case VectorOperation::kDivide: + return "divide"; + case VectorOperation::kEqual: + return "equal"; + case VectorOperation::kNotEqual: + return "not equal"; + case VectorOperation::kModulo: + return "modulo"; + case VectorOperation::kBitwiseXor: + return "bitwise XOR"; + case VectorOperation::kBitwiseOr: + return "bitwise OR"; + case VectorOperation::kBitwiseAnd: + return "bitwise AND"; + case VectorOperation::kRightShift: + return "right shift"; + case VectorOperation::kLeftShift: + return "left shift"; + default: + return "Unknown"; + } +} + +template +void SanityCheck(VectorOperation operation, T vector, decltype(T().x) value1, + decltype(T().x) value2) { + if (operation == VectorOperation::kIncrementPrefix) { + ++value1; + } else if (operation == VectorOperation::kIncrementPostfix) { + value1++; + } else if (operation == VectorOperation::kDecrementPrefix) { + --value1; + } else if (operation == VectorOperation::kDecrementPostfix) { + value1--; + } else if (operation == VectorOperation::kAddAssign) { + value1 += value2; + } else if (operation == VectorOperation::kSubtractAssign) { + value1 -= value2; + } else if (operation == VectorOperation::kMultiplyAssign) { + value1 *= value2; + } else if (operation == VectorOperation::kDivideAssign) { + value1 /= value2; + } else if (operation == VectorOperation::kAdd) { + value1 = value1 + value2; + } else if (operation == VectorOperation::kSubtract) { + value1 = value1 - value2; + } else if (operation == VectorOperation::kMultiply) { + value1 = value1 * value2; + } else if (operation == VectorOperation::kDivide) { + value1 = value1 / value2; + } else if (operation == VectorOperation::kEqual) { + value1 = (value1 == value2) ? 2 * value1 : 3 * value1; + } else if (operation == VectorOperation::kNotEqual) { + value1 = (value1 != value2) ? 2 * value1 : 3 * value1; + } else { + if constexpr (std::is_signed_v) { + if (operation == VectorOperation::kNegate) { + value1 = -value1; + } + } + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kBitwiseNot) { + value1 = ~value1; + } else if (operation == VectorOperation::kModuloAssign) { + value1 %= value2; + } else if (operation == VectorOperation::kBitwiseXorAssign) { + value1 ^= value2; + } else if (operation == VectorOperation::kBitwiseOrAssign) { + value1 |= value2; + } else if (operation == VectorOperation::kBitwiseAndAssign) { + value1 &= value2; + } else if (operation == VectorOperation::kRightShiftAssign) { + value1 >>= value2; + } else if (operation == VectorOperation::kLeftShiftAssign) { + value1 <<= value2; + } else if (operation == VectorOperation::kModulo) { + value1 = value1 % value2; + } else if (operation == VectorOperation::kBitwiseXor) { + value1 = value1 ^ value2; + } else if (operation == VectorOperation::kBitwiseOr) { + value1 = value1 | value2; + } else if (operation == VectorOperation::kBitwiseAnd) { + value1 = value1 & value2; + } else if (operation == VectorOperation::kRightShift) { + value1 = value1 >> value2; + } else if (operation == VectorOperation::kLeftShift) { + value1 = value1 << value2; + } + } + } + SanityCheck(vector, value1); +} + +template +__device__ __host__ void PerformVectorOperation(VectorOperation operation, T* vector1, + const T& vector2) { + if (operation == VectorOperation::kIncrementPrefix) { + ++(*vector1); + } else if (operation == VectorOperation::kIncrementPostfix) { + (*vector1)++; + } else if (operation == VectorOperation::kDecrementPrefix) { + --(*vector1); + } else if (operation == VectorOperation::kDecrementPostfix) { + (*vector1)--; + } else if (operation == VectorOperation::kAddAssign) { + *vector1 += vector2; + } else if (operation == VectorOperation::kSubtractAssign) { + *vector1 -= vector2; + } else if (operation == VectorOperation::kMultiplyAssign) { + *vector1 *= vector2; + } else if (operation == VectorOperation::kDivideAssign) { + *vector1 /= vector2; + } else if (operation == VectorOperation::kAdd) { + *vector1 = *vector1 + vector2; + } else if (operation == VectorOperation::kSubtract) { + *vector1 = *vector1 - vector2; + } else if (operation == VectorOperation::kMultiply) { + *vector1 = *vector1 * vector2; + } else if (operation == VectorOperation::kDivide) { + *vector1 = *vector1 / vector2; + } else if (operation == VectorOperation::kEqual) { + *vector1 = (*vector1 == vector2) ? 2 * *vector1 : 3 * *vector1; + } else if (operation == VectorOperation::kNotEqual) { + *vector1 = (*vector1 != vector2) ? 2 * *vector1 : 3 * *vector1; + } else { + if constexpr (std::is_signed_v) { + if (operation == VectorOperation::kNegate) { + *vector1 = -(*vector1); + } + } + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kBitwiseNot) { + *vector1 = ~(*vector1); + } else if (operation == VectorOperation::kModuloAssign) { + *vector1 %= vector2; + } else if (operation == VectorOperation::kBitwiseXorAssign) { + *vector1 ^= vector2; + } else if (operation == VectorOperation::kBitwiseOrAssign) { + *vector1 |= vector2; + } else if (operation == VectorOperation::kBitwiseAndAssign) { + *vector1 &= vector2; + } else if (operation == VectorOperation::kRightShiftAssign) { + *vector1 >>= vector2; + } else if (operation == VectorOperation::kLeftShiftAssign) { + *vector1 <<= vector2; + } else if (operation == VectorOperation::kModulo) { + *vector1 = *vector1 % vector2; + } else if (operation == VectorOperation::kBitwiseXor) { + *vector1 = *vector1 ^ vector2; + } else if (operation == VectorOperation::kBitwiseOr) { + *vector1 = *vector1 | vector2; + } else if (operation == VectorOperation::kBitwiseAnd) { + *vector1 = *vector1 & vector2; + } else if (operation == VectorOperation::kRightShift) { + *vector1 = *vector1 >> vector2; + } else if (operation == VectorOperation::kLeftShift) { + *vector1 = *vector1 << vector2; + } + } + } +} + +template +__device__ __host__ void PerformVectorOperation(VectorOperation operation, T* vector, + decltype(T().x) value) { + if (operation == VectorOperation::kAddAssign) { + *vector += value; + } else if (operation == VectorOperation::kSubtractAssign) { + *vector -= value; + } else if (operation == VectorOperation::kMultiplyAssign) { + *vector *= value; + } else if (operation == VectorOperation::kDivideAssign) { + *vector /= value; + } else if (operation == VectorOperation::kAdd) { + *vector = *vector + value; + } else if (operation == VectorOperation::kSubtract) { + *vector = *vector - value; + } else if (operation == VectorOperation::kMultiply) { + *vector = *vector * value; + } else if (operation == VectorOperation::kDivide) { + *vector = *vector / value; + } else if (operation == VectorOperation::kEqual) { + *vector = (*vector == value) ? 2 * *vector : 3 * *vector; + } else if (operation == VectorOperation::kNotEqual) { + *vector = (*vector != value) ? 2 * *vector : 3 * *vector; + } else { + if constexpr (std::is_integral_v) { + if (operation == VectorOperation::kModulo) { + *vector = *vector % value; + } else if (operation == VectorOperation::kBitwiseXor) { + *vector = *vector ^ value; + } else if (operation == VectorOperation::kBitwiseOr) { + *vector = *vector | value; + } else if (operation == VectorOperation::kBitwiseAnd) { + *vector = *vector & value; + } else if (operation == VectorOperation::kRightShift) { + *vector = *vector >> value; + } else if (operation == VectorOperation::kLeftShift) { + *vector = *vector << value; + } + } + } +} + +template +T PerformVectorOperationHost(VectorOperation operation, decltype(T().x) value1, + decltype(T().x) value2) { + T vector1{}; + MakeVectorType(&vector1, value1); + + if constexpr (two_vectors) { + T vector2{}; + MakeVectorType(&vector2, value2); + PerformVectorOperation(operation, &vector1, vector2); + } else { + PerformVectorOperation(operation, &vector1, value2); + } + + return vector1; +} + +template +__global__ void VectorOperationKernel(VectorOperation operation, T* vector1, decltype(T().x) value1, + decltype(T().x) value2) { + MakeVectorType(vector1, value1); + if constexpr (two_vectors) { + T vector2{}; + MakeVectorType(&vector2, value2); + PerformVectorOperation(operation, vector1, vector2); + } else { + PerformVectorOperation(operation, vector1, value2); + } +} + +template +T PerformVectorOperationDevice(VectorOperation operation, decltype(T().x) value1, + decltype(T().x) value2) { + T vector_h{}; + T* vector_d; + HIP_CHECK(hipMalloc(&vector_d, sizeof(T))); + HIP_CHECK(hipMemcpy(vector_d, &vector_h, sizeof(T), hipMemcpyHostToDevice)); + VectorOperationKernel<<<1, 1, 0, 0>>>(operation, vector_d, value1, value2); + HIP_CHECK(hipMemcpy(&vector_h, vector_d, sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(vector_d)); + return vector_h; +} diff --git a/catch/unit/vector_types/vector_types.cc b/catch/unit/vector_types/vector_types.cc new file mode 100644 index 0000000000..2a4b5eda0c --- /dev/null +++ b/catch/unit/vector_types/vector_types.cc @@ -0,0 +1,357 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vector_operations_common.hh" +#if HT_AMD +#include "negative_negate_unsigned_rtc.hh" +#include "negative_bitwise_float_double_rtc.hh" +#include "negative_calculate_assign_with_value_rtc.hh" +#endif + +/** + * @addtogroup make_vector make_vector + * @{ + * @ingroup VectorTypeTest + */ + +/** + * Test Description + * ------------------------ + * - Creates vectors for all supported types: + * -# make_char1, make_char2, make_char3, make_char4 + * -# make_uchar1, make_uchar2, make_uchar3, make_uchar4 + * -# make_short1, make_short2, make_short3, make_short4 + * -# make_ushort1, make_ushort2, make_ushort3, make_ushort4 + * -# make_int1, make_int2, make_int3, make_int4 + * -# make_uint1, make_uint2, make_uint4, make_uint4 + * -# make_long1, make_long2, make_long3, make_long4 + * -# make_ulong1, make_ulong2, make_ulong3, make_ulong4 + * -# make_longlong1, make_longlong2, make_longlong3, make_longlong4 + * -# make_ulonglong1, make_ulonglong2, make_ulonglong3, make_ulonglong4 + * -# make_float1, make_float2, make_float3, make_float4 + * -# make_double1, make_double2, make_double3, make_double4 + * - Checks that each vector type is created as expected + * - Calls make function from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_make_vector_SanityCheck_Basic_Host", "", char1, uchar1, char2, uchar2, + char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, short3, ushort3, + short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, uint4, long1, + ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, ulonglong1, + longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, float1, + float2, float3, float4, double1, double2, double3, double4) { + auto value = GetTestValue(0); + TestType vector = MakeVectorTypeHost(value); + SanityCheck(vector, value); +} + +/** + * Test Description + * ------------------------ + * - Creates vectors for all supported types: + * -# make_char1, make_char2, make_char3, make_char4 + * -# make_uchar1, make_uchar2, make_uchar3, make_uchar4 + * -# make_short1, make_short2, make_short3, make_short4 + * -# make_ushort1, make_ushort2, make_ushort3, make_ushort4 + * -# make_int1, make_int2, make_int3, make_int4 + * -# make_uint1, make_uint2, make_uint4, make_uint4 + * -# make_long1, make_long2, make_long3, make_long4 + * -# make_ulong1, make_ulong2, make_ulong3, make_ulong4 + * -# make_longlong1, make_longlong2, make_longlong3, make_longlong4 + * -# make_ulonglong1, make_ulonglong2, make_ulonglong3, make_ulonglong4 + * -# make_float1, make_float2, make_float3, make_float4 + * -# make_double1, make_double2, make_double3, make_double4 + * - Checks that each vector type is created as expected + * - Calls make function from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_make_vector_SanityCheck_Basic_Device", "", char1, uchar1, char2, uchar2, + char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, short3, ushort3, + short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, uint4, long1, + ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, ulonglong1, + longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, float1, + float2, float3, float4, double1, double2, double3, double4) { + auto value = GetTestValue(0); + TestType vector = MakeVectorTypeDevice(value); + SanityCheck(vector, value); +} + +#if HT_AMD +/** + * Test Description + * ------------------------ + * - Performs supported operations between all supported vector types + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndVectorOperations_SanityCheck_Basic_Host", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : {VectorOperation::kIncrementPrefix, + VectorOperation::kIncrementPostfix, + VectorOperation::kDecrementPrefix, + VectorOperation::kDecrementPostfix, + VectorOperation::kAddAssign, + VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, + VectorOperation::kDivideAssign, + VectorOperation::kNegate, + VectorOperation::kBitwiseNot, + VectorOperation::kModuloAssign, + VectorOperation::kBitwiseXorAssign, + VectorOperation::kBitwiseOrAssign, + VectorOperation::kBitwiseAndAssign, + VectorOperation::kRightShiftAssign, + VectorOperation::kLeftShiftAssign, + VectorOperation::kAdd, + VectorOperation::kSubtract, + VectorOperation::kMultiply, + VectorOperation::kDivide, + VectorOperation::kEqual, + VectorOperation::kNotEqual, + VectorOperation::kModulo, + VectorOperation::kBitwiseXor, + VectorOperation::kBitwiseOr, + VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, + VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationHost(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between vector and underlying vector type (scalar) + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the host side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndValueTypeOperations_SanityCheck_Basic_Host", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : + {VectorOperation::kAddAssign, VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, VectorOperation::kDivideAssign, VectorOperation::kAdd, + VectorOperation::kSubtract, VectorOperation::kMultiply, VectorOperation::kDivide, + VectorOperation::kEqual, VectorOperation::kNotEqual, VectorOperation::kModulo, + VectorOperation::kBitwiseXor, VectorOperation::kBitwiseOr, VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationHost(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between all supported vector types + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndVectorOperations_SanityCheck_Basic_Device", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : {VectorOperation::kIncrementPrefix, + VectorOperation::kIncrementPostfix, + VectorOperation::kDecrementPrefix, + VectorOperation::kDecrementPostfix, + VectorOperation::kAddAssign, + VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, + VectorOperation::kDivideAssign, + VectorOperation::kNegate, + VectorOperation::kBitwiseNot, + VectorOperation::kModuloAssign, + VectorOperation::kBitwiseXorAssign, + VectorOperation::kBitwiseOrAssign, + VectorOperation::kBitwiseAndAssign, + VectorOperation::kRightShiftAssign, + VectorOperation::kLeftShiftAssign, + VectorOperation::kAdd, + VectorOperation::kSubtract, + VectorOperation::kMultiply, + VectorOperation::kDivide, + VectorOperation::kEqual, + VectorOperation::kNotEqual, + VectorOperation::kModulo, + VectorOperation::kBitwiseXor, + VectorOperation::kBitwiseOr, + VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, + VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationDevice(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs supported operations between vector and underlying vector type (scalar) + * - Checks that the operators are overloaded as expected by comparing results to the manually + * calculated ones + * - Calls operations from the device side + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_VectorAndValueTypeOperations_SanityCheck_Basic_Device", "", char1, uchar1, + char2, uchar2, char3, uchar3, char4, uchar4, short1, ushort1, short2, ushort2, + short3, ushort3, short4, ushort4, int1, uint1, int2, uint2, int3, uint3, int4, + uint4, long1, ulong1, long2, ulong2, long3, ulong3, long4, ulong4, longlong1, + ulonglong1, longlong2, ulonglong2, longlong3, ulonglong3, longlong4, ulonglong4, + float1, float2, float3, float4, double1, double2, double3, double4) { + auto value1 = GetTestValue(0); + auto value2 = GetTestValue(1); + + for (const auto operation : + {VectorOperation::kAddAssign, VectorOperation::kSubtractAssign, + VectorOperation::kMultiplyAssign, VectorOperation::kDivideAssign, VectorOperation::kAdd, + VectorOperation::kSubtract, VectorOperation::kMultiply, VectorOperation::kDivide, + VectorOperation::kEqual, VectorOperation::kNotEqual, VectorOperation::kModulo, + VectorOperation::kBitwiseXor, VectorOperation::kBitwiseOr, VectorOperation::kBitwiseAnd, + VectorOperation::kRightShift, VectorOperation::kLeftShift}) { + DYNAMIC_SECTION("operation: " << to_string(operation)) { + TestType vector = PerformVectorOperationDevice(operation, value1, value2); + SanityCheck(operation, vector, value1, value2); + } + } +} + +template void VectorTypesRTCWrapper(const char* program_source) { + hiprtcProgram program{}; + HIPRTC_CHECK(hiprtcCreateProgram(&program, program_source, "vector_types_kernels.cc", 0, nullptr, + nullptr)); + +#if HT_AMD + std::string args = std::string("-ferror-limit=100"); + const char* options[] = {args.c_str()}; + hiprtcResult result{hiprtcCompileProgram(program, 1, options)}; +#else + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; +#endif + + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + std::string error_message{"error:"}; + + size_t npos_e = log.find(error_message, 0); + while (npos_e != std::string::npos) { + ++error_count; + npos_e = log.find(error_message, npos_e + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_errors_num); +} + +/** + * Test Description + * ------------------------ + * - Compiles kernels and host functions with negative scenarios: + * -# Negate (-) operator on the unsigned vectors + * -# Bitwise operators on the floating-point vectors + * -# Calculate-assign operators that are not supported between vector and scalar + * - Utilizes HIP RTC for compilation + * Test source + * ------------------------ + * - unit/vector_types/vector_types.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_VectorOperators_Negative_Parameters_RTC") { + VectorTypesRTCWrapper<8>(kNegateUnsignedChar); + VectorTypesRTCWrapper<8>(kNegateUnsignedShort); + VectorTypesRTCWrapper<8>(kNegateUnsignedInt); + VectorTypesRTCWrapper<8>(kNegateUnsignedLong); + VectorTypesRTCWrapper<8>(kNegateUnsignedLongLong); + VectorTypesRTCWrapper<96>(kBitwiseFloat); + VectorTypesRTCWrapper<96>(kBitwiseDouble); + VectorTypesRTCWrapper<96>(kCalculateAssignChar); + VectorTypesRTCWrapper<96>(kCalculateAssignShort); + VectorTypesRTCWrapper<96>(kCalculateAssignInt); + VectorTypesRTCWrapper<96>(kCalculateAssignLong); + VectorTypesRTCWrapper<96>(kCalculateAssignLongLong); +} +#endif // HT_AMD diff --git a/catch/unit/vector_types/vector_types_common.hh b/catch/unit/vector_types/vector_types_common.hh new file mode 100644 index 0000000000..cbaf3f24a6 --- /dev/null +++ b/catch/unit/vector_types/vector_types_common.hh @@ -0,0 +1,187 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +constexpr auto kIntegerTestValueFirst = 42; +constexpr auto kIntegerTestValueSecond = 4; +constexpr auto kFloatingPointTestValueFirst = 42.125; +constexpr auto kFloatingPointTestValueSecond = 4.875; + +template T GetTestValue(int index) { + if (index == 0) { + return std::is_floating_point_v ? static_cast(kIntegerTestValueFirst) + : static_cast(kFloatingPointTestValueFirst); + } else { + return std::is_floating_point_v ? static_cast(kIntegerTestValueSecond) + : static_cast(kFloatingPointTestValueSecond); + } +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); + REQUIRE(vector.z == expected_value); +} + +template +typename std::enable_if::type SanityCheck( + T vector, decltype(T().x) expected_value) { + REQUIRE(vector.x == expected_value); + REQUIRE(vector.y == expected_value); + REQUIRE(vector.z == expected_value); + REQUIRE(vector.w == expected_value); +} + +template +__host__ __device__ void MakeVectorType(T* vector_ptr, decltype(T().x) value) { + if constexpr (std::is_same_v) { + *vector_ptr = make_char1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_char4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uchar4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_short4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ushort4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_int4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_uint4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_long4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_longlong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_ulonglong4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_float4(value, value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double1(value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double2(value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double3(value, value, value); + } else if constexpr (std::is_same_v) { + *vector_ptr = make_double4(value, value, value, value); + } +} + +template T MakeVectorTypeHost(decltype(T().x) value) { + T vector{}; + MakeVectorType(&vector, value); + return vector; +} + +template __global__ void VectorTypeKernel(T* vector, decltype(T().x) value) { + MakeVectorType(vector, value); +} + +template T MakeVectorTypeDevice(decltype(T().x) value) { + T vector_h{}; + T* vector_d; + HIP_CHECK(hipMalloc(&vector_d, sizeof(T))); + HIP_CHECK(hipMemcpy(vector_d, &vector_h, sizeof(T), hipMemcpyHostToDevice)); + VectorTypeKernel<<<1, 1, 0, 0>>>(vector_d, value); + HIP_CHECK(hipMemcpy(&vector_h, vector_d, sizeof(T), hipMemcpyDeviceToHost)); + HIP_CHECK(hipFree(vector_d)); + return vector_h; +} From da67d77a0c0a415b15af32e305d4cc253b89c118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:26:21 +0100 Subject: [PATCH 28/71] EXSWHTEC-317 - Implement tests for 1D texture device functions #366 Change-Id: Iafefada132fb94a1b120d91915e24524e7d766d0 --- catch/unit/texture/CMakeLists.txt | 7 ++ catch/unit/texture/kernels.hh | 52 ++++++++ catch/unit/texture/tex1D.cc | 140 +++++++++++++++++++++ catch/unit/texture/tex1DGrad.cc | 140 +++++++++++++++++++++ catch/unit/texture/tex1DLayered.cc | 149 +++++++++++++++++++++++ catch/unit/texture/tex1DLayeredGrad.cc | 140 +++++++++++++++++++++ catch/unit/texture/tex1DLayeredLod.cc | 140 +++++++++++++++++++++ catch/unit/texture/tex1DLod.cc | 140 +++++++++++++++++++++ catch/unit/texture/tex1Dfetch.cc | 162 +++++++++++++++++++++++++ 9 files changed, 1070 insertions(+) create mode 100644 catch/unit/texture/tex1D.cc create mode 100644 catch/unit/texture/tex1DGrad.cc create mode 100644 catch/unit/texture/tex1DLayered.cc create mode 100644 catch/unit/texture/tex1DLayeredGrad.cc create mode 100644 catch/unit/texture/tex1DLayeredLod.cc create mode 100644 catch/unit/texture/tex1DLod.cc create mode 100644 catch/unit/texture/tex1Dfetch.cc diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index fe4c0a8d1d..73394005b6 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -51,6 +51,13 @@ set(TEST_SRC hipMipmappedArrayCreate.cc hipMipmappedArrayDestroy.cc hipMipmappedArrayGetLevel.cc + tex1Dfetch.cc + tex1D.cc + tex1DLayered.cc + tex1DGrad.cc + tex1DLayeredGrad.cc + tex1DLayeredLod.cc + tex1DLod.cc ) if(WIN32) diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index 056dd3f6e0..ac5d73ff40 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -33,6 +33,14 @@ __host__ __device__ inline float GetCoordinate(size_t iteration, size_t N, size_ return normalized_coords ? x / dim : x; } +template +__global__ void tex1DfetchKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + out[tid] = tex1D(tex_obj, tid); +} + template __global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords) { @@ -43,6 +51,50 @@ __global__ void tex1DKernel(TexelType* const out, size_t N, hipTextureObject_t t out[tid] = tex1D(tex_obj, x); } +template +__global__ void tex1DLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + float level_of_detail) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLod(tex_obj, x, level_of_detail); +} + +template +__global__ void tex1DLayeredLodKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + int layer, float level_of_detail) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLayeredLod(tex_obj, x, layer, level_of_detail); +} + +template +__global__ void tex1DGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, bool normalized_coords, + float dx, float dy) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DGrad(tex_obj, x, dx, dy); +} + +template +__global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, + size_t width, size_t num_subdivisions, + bool normalized_coords, float dx, float dy, int layer) { + const auto tid = cg::this_grid().thread_rank(); + if (tid >= N) return; + + float x = GetCoordinate(tid, N, width, num_subdivisions, normalized_coords); + out[tid] = tex1DLayeredGrad(tex_obj, x, layer, dx, dy); +} + template __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, diff --git a/catch/unit/texture/tex1D.cc b/catch/unit/texture/tex1D.cc new file mode 100644 index 0000000000..de92fca2ee --- /dev/null +++ b/catch/unit/texture/tex1D.cc @@ -0,0 +1,140 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1D tex1D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1D` and read mode set to `hipReadModeElementType`. The test + * is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DGrad.cc b/catch/unit/texture/tex1DGrad.cc new file mode 100644 index 0000000000..5b893d0954 --- /dev/null +++ b/catch/unit/texture/tex1DGrad.cc @@ -0,0 +1,140 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DGrad tex1DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLayered.cc b/catch/unit/texture/tex1DLayered.cc new file mode 100644 index 0000000000..e0ad4f707c --- /dev/null +++ b/catch/unit/texture/tex1DLayered.cc @@ -0,0 +1,149 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayered tex1DLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayered` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayered_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + const auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayered` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayered_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} diff --git a/catch/unit/texture/tex1DLayeredGrad.cc b/catch/unit/texture/tex1DLayeredGrad.cc new file mode 100644 index 0000000000..07c9734619 --- /dev/null +++ b/catch/unit/texture/tex1DLayeredGrad.cc @@ -0,0 +1,140 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayeredGrad tex1DLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLayeredLod.cc b/catch/unit/texture/tex1DLayeredLod.cc new file mode 100644 index 0000000000..874c99c85a --- /dev/null +++ b/catch/unit/texture/tex1DLayeredLod.cc @@ -0,0 +1,140 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLayeredLod tex1DLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1DLod.cc b/catch/unit/texture/tex1DLod.cc new file mode 100644 index 0000000000..ceee1211b3 --- /dev/null +++ b/catch/unit/texture/tex1DLod.cc @@ -0,0 +1,140 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex1DLod tex1DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1DLod` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex1DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(1024, 0, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); + tex1DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex1Dfetch.cc b/catch/unit/texture/tex1Dfetch.cc new file mode 100644 index 0000000000..9354d92a46 --- /dev/null +++ b/catch/unit/texture/tex1Dfetch.cc @@ -0,0 +1,162 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include +#include + +#include "kernels.hh" +#include "utils.hh" +#include "vec4.hh" + +/** + * @addtogroup tex1D tex1D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1Dfetch` and read mode set to `hipReadModeElementType`. + * Test source + * ------------------------ + * - unit/texture/tex1Dfetch.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1Dfetch_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + std::vector> tex_h(1024); + for (auto i = 0u; i < tex_h.size(); ++i) { + tex_h[i].x = i + 7; + tex_h[i].y = i + 7; + tex_h[i].z = i + 7; + tex_h[i].w = i + 7; + } + + const auto alloc_size = tex_h.size() * sizeof(vec4); + LinearAllocGuard> tex_alloc_d(LinearAllocs::hipMalloc, alloc_size); + HIP_CHECK(hipMemcpy(tex_alloc_d.ptr(), tex_h.data(), alloc_size, hipMemcpyHostToDevice)); + + hipResourceDesc res_desc; + memset(&res_desc, 0, sizeof(res_desc)); + res_desc.resType = hipResourceTypeLinear; + res_desc.res.linear.devPtr = tex_alloc_d.ptr(); + res_desc.res.linear.desc = hipCreateChannelDesc>(); + res_desc.res.linear.sizeInBytes = alloc_size; + + hipTextureDesc tex_desc; + memset(&tex_desc, 0, sizeof(tex_desc)); + tex_desc.filterMode = hipFilterModePoint; + tex_desc.readMode = hipReadModeElementType; + tex_desc.normalizedCoords = false; + tex_desc.addressMode[0] = hipAddressModeClamp; + + LinearAllocGuard> out_alloc_d(LinearAllocs::hipMalloc, alloc_size); + TextureGuard tex(&res_desc, &tex_desc); + + const auto num_threads = std::min(1024, tex_h.size()); + const auto num_blocks = (tex_h.size() + num_threads - 1) / num_threads; + tex1DfetchKernel> + <<>>(out_alloc_d.ptr(), tex_h.size(), tex.object()); + + std::vector> out_alloc_h(tex_h.size()); + HIP_CHECK(hipMemcpy(out_alloc_h.data(), out_alloc_d.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + for (auto i = 0u; i < out_alloc_h.size(); ++i) { + INFO("Index: " << i); + const auto ref_val = tex_h[i]; + REQUIRE(ref_val.x == out_alloc_h[i].x); + REQUIRE(ref_val.y == out_alloc_h[i].y); + REQUIRE(ref_val.z == out_alloc_h[i].z); + REQUIRE(ref_val.w == out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex1Dfetch` and read mode set to `hipReadModeNormalizedFloat`. + * Test source + * ------------------------ + * - unit/texture/tex1Dfetch.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex1Dfetch_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + std::vector> tex_h(1024); + for (auto i = 0u; i < tex_h.size(); ++i) { + tex_h[i].x = i + 7; + tex_h[i].y = i + 7; + tex_h[i].z = i + 7; + tex_h[i].w = i + 7; + } + + const auto alloc_size = tex_h.size() * sizeof(vec4); + LinearAllocGuard> tex_alloc_d(LinearAllocs::hipMalloc, alloc_size); + HIP_CHECK(hipMemcpy(tex_alloc_d.ptr(), tex_h.data(), alloc_size, hipMemcpyHostToDevice)); + + hipResourceDesc res_desc; + memset(&res_desc, 0, sizeof(res_desc)); + res_desc.resType = hipResourceTypeLinear; + res_desc.res.linear.devPtr = tex_alloc_d.ptr(); + res_desc.res.linear.desc = hipCreateChannelDesc>(); + res_desc.res.linear.sizeInBytes = alloc_size; + + hipTextureDesc tex_desc; + memset(&tex_desc, 0, sizeof(tex_desc)); + tex_desc.filterMode = hipFilterModePoint; + tex_desc.readMode = hipReadModeElementType; + tex_desc.normalizedCoords = false; + tex_desc.addressMode[0] = hipAddressModeClamp; + + LinearAllocGuard> out_alloc_d(LinearAllocs::hipMalloc, alloc_size); + TextureGuard tex(&res_desc, &tex_desc); + + const auto num_threads = std::min(1024, tex_h.size()); + const auto num_blocks = (tex_h.size() + num_threads - 1) / num_threads; + tex1DfetchKernel> + <<>>(out_alloc_d.ptr(), tex_h.size(), tex.object()); + + std::vector> out_alloc_h(tex_h.size()); + HIP_CHECK(hipMemcpy(out_alloc_h.data(), out_alloc_d.ptr(), alloc_size, hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + + for (auto i = 0u; i < out_alloc_h.size(); ++i) { + INFO("Index: " << i); + const auto ref_val = Vec4Map(tex_h[i], NormalizeInteger); + REQUIRE(ref_val.x == out_alloc_h[i].x); + REQUIRE(ref_val.y == out_alloc_h[i].y); + REQUIRE(ref_val.z == out_alloc_h[i].z); + REQUIRE(ref_val.w == out_alloc_h[i].w); + } +} \ No newline at end of file From 37d8529a9bbfdfd9f0cb4708c6c72301423ca733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:22:01 +0100 Subject: [PATCH 29/71] EXSWHTEC-318 - Implement tests for 2D texture device functions #367 Change-Id: I5404eae219d23dc058aa6ef150a0764b06ab6de1 --- catch/unit/texture/CMakeLists.txt | 2 + catch/unit/texture/test_fixture.hh | 3 +- catch/unit/texture/tex2D.cc | 174 ++++++++++++++++++++++ catch/unit/texture/tex2DLayered.cc | 183 ++++++++++++++++++++++++ catch/unit/texture/texture_reference.hh | 11 +- 5 files changed, 363 insertions(+), 10 deletions(-) create mode 100644 catch/unit/texture/tex2D.cc create mode 100644 catch/unit/texture/tex2DLayered.cc diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index 73394005b6..14a74de8f5 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -42,6 +42,8 @@ set(TEST_SRC hipTextureObj3DCheckModes.cc hipTextureObj1DCheckSRGBModes.cc hipTextureObj2DCheckSRGBModes.cc + tex2D.cc + tex2DLayered.cc hipTexObjectTests.cc hipTextureObjectTests.cc hipBindTextureToMipmappedArray.cc diff --git a/catch/unit/texture/test_fixture.hh b/catch/unit/texture/test_fixture.hh index 47ac8b73bc..0572fd6fae 100644 --- a/catch/unit/texture/test_fixture.hh +++ b/catch/unit/texture/test_fixture.hh @@ -126,7 +126,8 @@ template struct TextureTestFix SetVec4(host_alloc.ptr()[i], i + test_value_offset); } - hipMemcpy3DParms memcpy_params = {}; + hipMemcpy3DParms memcpy_params; + memset(&memcpy_params, 0 sizeof(hipMemcpy3DParms)); memcpy_params.dstArray = tex_alloc_d.ptr(); memcpy_params.extent = params.LayeredExtent(); memcpy_params.extent.height = memcpy_params.extent.height ?: 1; diff --git a/catch/unit/texture/tex2D.cc b/catch/unit/texture/tex2D.cc new file mode 100644 index 0000000000..79d6055ede --- /dev/null +++ b/catch/unit/texture/tex2D.cc @@ -0,0 +1,174 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2D tex2D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2D` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + TextureTestParams params = {0}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto index = j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + TextureTestParams params = {0}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto index = j * params.NumItersX() + i; + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayered.cc b/catch/unit/texture/tex2DLayered.cc new file mode 100644 index 0000000000..b05a2e0a32 --- /dev/null +++ b/catch/unit/texture/tex2DLayered.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayered tex2DLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayered` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + TextureTestParams params = {0}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto index = j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayered` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + TextureTestParams params = {0}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto index = j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texture_reference.hh b/catch/unit/texture/texture_reference.hh index d2ee9159e0..21e08abd0e 100644 --- a/catch/unit/texture/texture_reference.hh +++ b/catch/unit/texture/texture_reference.hh @@ -236,17 +236,10 @@ template class TextureReference { return coord; } - template float FloatToNBitFractional(float x) const { - constexpr size_t mult = 1 << N; - const auto x_trunc = std::trunc(x); - const auto x_frac = std::round((x - x_trunc) * mult) / mult; - return x_trunc + x_frac; - } - std::tuple GetLinearFilteringParams(float coord) const { - const auto coordB = FloatToNBitFractional<8>(coord - 0.5f); + const auto coordB = coord - 0.5f; const auto index = floorf(coordB); - const auto coeff = coordB - index; + const FixedPoint<8> coeff = coordB - index; return {index, coeff}; } From 5fe762ae5deea9a23516c87613c89d8e72d15cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:11:27 +0100 Subject: [PATCH 30/71] EXSWHTEC-319 - Implement tests for 3D texture device functions #368 Change-Id: Ifc7a6feae1a9567df29d35928d8c4328497bfc97 --- catch/hipTestMain/config/config_amd_windows | 9 + catch/unit/texture/CMakeLists.txt | 3 + catch/unit/texture/kernels.hh | 42 +++++ catch/unit/texture/tex3D.cc | 192 ++++++++++++++++++++ catch/unit/texture/tex3DGrad.cc | 191 +++++++++++++++++++ catch/unit/texture/tex3DLod.cc | 191 +++++++++++++++++++ 6 files changed, 628 insertions(+) create mode 100644 catch/unit/texture/tex3D.cc create mode 100644 catch/unit/texture/tex3DGrad.cc create mode 100644 catch/unit/texture/tex3DLod.cc diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index b3b396f6ba..912f7ecef0 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -211,6 +211,15 @@ "Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory", "=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===", "Unit_hipVectorTypes_test_on_device", +<<<<<<< HEAD +======= + "Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4", + "Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4", + "Unit_tex3DLod_Positive_ReadModeElementType", + "Unit_tex3DLod_Positive_ReadModeNormalizedFloat", + "Unit_tex3DGrad_Positive_ReadModeElementType", + "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", +>>>>>>> 5cdc6efc (Merge branch 'develop' into tex3D_tests) "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "NOTE: The following test is disabled due to defect - EXSWHTEC-241", diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index 14a74de8f5..85f653573c 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -60,6 +60,9 @@ set(TEST_SRC tex1DLayeredGrad.cc tex1DLayeredLod.cc tex1DLod.cc + tex3D.cc + tex3DLod.cc + tex3DGrad.cc ) if(WIN32) diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index ac5d73ff40..ec4e1449ef 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -131,6 +131,48 @@ __global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3D(tex_obj, x, y, z); } +template +__global__ void tex3DLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DLod(tex_obj, x, y, z, level); +} + +template +__global__ void tex3DGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float4 dx, float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DGrad(tex_obj, x, y, z, dx, dy); +} + template __global__ void tex1DLayeredKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, diff --git a/catch/unit/texture/tex3D.cc b/catch/unit/texture/tex3D.cc new file mode 100644 index 0000000000..ecfdd535bb --- /dev/null +++ b/catch/unit/texture/tex3D.cc @@ -0,0 +1,192 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3D tex3D + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3D` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 4, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3D` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} diff --git a/catch/unit/texture/tex3DGrad.cc b/catch/unit/texture/tex3DGrad.cc new file mode 100644 index 0000000000..a3f3d8ddfd --- /dev/null +++ b/catch/unit/texture/tex3DGrad.cc @@ -0,0 +1,191 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3DGrad tex3DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} diff --git a/catch/unit/texture/tex3DLod.cc b/catch/unit/texture/tex3DLod.cc new file mode 100644 index 0000000000..bd5ee5f7a5 --- /dev/null +++ b/catch/unit/texture/tex3DLod.cc @@ -0,0 +1,191 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex3DLod tex3DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + const auto ref_val = fixture.tex_h.Tex3D(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex3DLod` and read mode set to `hipReadModeNormalizedFloat`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex3DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 2); + params.num_subdivisions = 2; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + tex3DLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumIters(); ++i) { + const auto plane = i % (params.NumItersX() * params.NumItersY()); + float x = plane % params.NumItersX(); + float y = plane / params.NumItersX(); + float z = i / (params.NumItersX() * params.NumItersY()); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto ref_val = Vec4Map(fixture.tex_h.Tex3D(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} From 6eec9aa90d9ef7fed025914f8c3834b4175514d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:05:57 +0100 Subject: [PATCH 31/71] EXSWHTEC-320 - Implement tests for cubemap texture device functions #369 Change-Id: I1a247dba4e46ed7e1045dee0467d3fdac1f70cd0 --- catch/hipTestMain/config/config_amd_windows | 7 - catch/unit/texture/CMakeLists.txt | 6 + catch/unit/texture/kernels.hh | 130 ++++++++++++ catch/unit/texture/test_fixture.hh | 7 +- catch/unit/texture/texCubemap.cc | 201 +++++++++++++++++++ catch/unit/texture/texCubemapGrad.cc | 201 +++++++++++++++++++ catch/unit/texture/texCubemapLayered.cc | 209 ++++++++++++++++++++ catch/unit/texture/texCubemapLayeredGrad.cc | 209 ++++++++++++++++++++ catch/unit/texture/texCubemapLayeredLod.cc | 209 ++++++++++++++++++++ catch/unit/texture/texCubemapLod.cc | 201 +++++++++++++++++++ catch/unit/texture/texture_reference.hh | 69 ++++++- 11 files changed, 1439 insertions(+), 10 deletions(-) create mode 100644 catch/unit/texture/texCubemap.cc create mode 100644 catch/unit/texture/texCubemapGrad.cc create mode 100644 catch/unit/texture/texCubemapLayered.cc create mode 100644 catch/unit/texture/texCubemapLayeredGrad.cc create mode 100644 catch/unit/texture/texCubemapLayeredLod.cc create mode 100644 catch/unit/texture/texCubemapLod.cc diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 912f7ecef0..724aef781e 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -211,15 +211,8 @@ "Unit_hipHostMalloc_AllocateUseMoreThanAvailGPUMemory", "=== SWDEV-432250:Below tests failed in stress test on 10/11/23 ===", "Unit_hipVectorTypes_test_on_device", -<<<<<<< HEAD -======= "Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4", "Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4", - "Unit_tex3DLod_Positive_ReadModeElementType", - "Unit_tex3DLod_Positive_ReadModeNormalizedFloat", - "Unit_tex3DGrad_Positive_ReadModeElementType", - "Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", ->>>>>>> 5cdc6efc (Merge branch 'develop' into tex3D_tests) "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "NOTE: The following test is disabled due to defect - EXSWHTEC-241", diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index 85f653573c..18881833b9 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -63,6 +63,12 @@ set(TEST_SRC tex3D.cc tex3DLod.cc tex3DGrad.cc + texCubemap.cc + texCubemapLod.cc + texCubemapGrad.cc + texCubemapLayered.cc + texCubemapLayeredLod.cc + texCubemapLayeredGrad.cc ) if(WIN32) diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index ec4e1449ef..f9f7a6a41e 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -173,6 +173,69 @@ __global__ void tex3DGradKernel(TexelType* const out, size_t N_x, size_t N_y, si out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = tex3DGrad(tex_obj, x, y, z, dx, dy); } +template +__global__ void texCubemapKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemap(tex_obj, x, y, z); +} + +template +__global__ void texCubemapLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = texCubemapLod(tex_obj, x, y, z, level); +} + +template +__global__ void texCubemapGradKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, bool normalized_coords, + float4 dx, float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapGrad(tex_obj, x, y, z, dx, dy); +} + template __global__ void tex1DLayeredKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, bool normalized_coords, @@ -198,4 +261,71 @@ __global__ void tex2DLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); out[tid_y * N_x + tid_x] = tex2DLayered(tex_obj, x, y, layer); +} + +template +__global__ void texCubemapLayeredKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayered(tex_obj, x, y, z, layer); +} + +template +__global__ void texCubemapLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer, float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayeredLod(tex_obj, x, y, z, layer, level); +} + +template +__global__ void texCubemapLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, + size_t N_z, hipTextureObject_t tex_obj, size_t width, + size_t height, size_t depth, size_t num_subdivisions, + bool normalized_coords, size_t layer, float4 dx, + float4 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + const auto tid_z = blockIdx.z * blockDim.z + threadIdx.z; + if (tid_z >= N_z) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + float z = GetCoordinate(tid_z, N_z, depth, num_subdivisions, normalized_coords); + + out[tid_z * N_x * N_y + tid_y * N_x + tid_x] = + texCubemapLayeredGrad(tex_obj, x, y, z, layer, dx, dy); } \ No newline at end of file diff --git a/catch/unit/texture/test_fixture.hh b/catch/unit/texture/test_fixture.hh index 0572fd6fae..28ca9ee2df 100644 --- a/catch/unit/texture/test_fixture.hh +++ b/catch/unit/texture/test_fixture.hh @@ -34,6 +34,7 @@ template struct TextureTestParams { size_t layers; size_t num_subdivisions; hipTextureDesc tex_desc; + bool cubemap; size_t Size() const { return extent.width * (extent.height ?: 1) * (extent.depth ?: 1) * (layers ?: 1); @@ -53,6 +54,10 @@ template struct TextureTestParams { size_t Depth() const { return extent.depth; } + unsigned int Flags() const { + return (Layered() ? hipArrayLayered : 0u) | (cubemap ? hipArrayCubemap : 0u); + } + hipExtent LayeredExtent() const { return Layered() ? make_hipExtent(Width(), Height(), layers) : extent; } @@ -115,7 +120,7 @@ template struct TextureTestFix : params{p}, host_alloc{LinearAllocs::hipHostMalloc, sizeof(VecType) * params.Size()}, tex_h{host_alloc.ptr(), params.extent, params.layers}, - tex_alloc_d{params.LayeredExtent(), params.Layered() ? hipArrayLayered : 0u}, + tex_alloc_d{params.LayeredExtent(), params.Flags()}, tex{ResDesc(), ¶ms.tex_desc}, out_alloc_d{LinearAllocs::hipMalloc, sizeof(OutType) * params.NumIters()}, out_alloc_h(params.NumIters()) {} diff --git a/catch/unit/texture/texCubemap.cc b/catch/unit/texture/texCubemap.cc new file mode 100644 index 0000000000..ade3775c52 --- /dev/null +++ b/catch/unit/texture/texCubemap.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemap texCubemap + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemap` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemap` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapGrad.cc b/catch/unit/texture/texCubemapGrad.cc new file mode 100644 index 0000000000..b2024737b3 --- /dev/null +++ b/catch/unit/texture/texCubemapGrad.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapGrad texCubemapGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayered.cc b/catch/unit/texture/texCubemapLayered.cc new file mode 100644 index 0000000000..d7db8d0847 --- /dev/null +++ b/catch/unit/texture/texCubemapLayered.cc @@ -0,0 +1,209 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayered texCubemapLayered + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayered` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayered_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayered` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayered_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayeredGrad.cc b/catch/unit/texture/texCubemapLayeredGrad.cc new file mode 100644 index 0000000000..96dd0415b6 --- /dev/null +++ b/catch/unit/texture/texCubemapLayeredGrad.cc @@ -0,0 +1,209 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayeredGrad texCubemapLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredGrad` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "", char, + unsigned char, short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, float4{}, float4{}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLayeredLod.cc b/catch/unit/texture/texCubemapLayeredLod.cc new file mode 100644 index 0000000000..fd9db2c0b7 --- /dev/null +++ b/catch/unit/texture/texCubemapLayeredLod.cc @@ -0,0 +1,209 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLayeredLod texCubemapLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredLod` and read mode set to + * `hipReadModeElementType`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", char, + unsigned char, short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.layers = 1; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + for (auto layer = 0u; layer < params.layers; ++layer) { + texCubemapLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texCubemapLod.cc b/catch/unit/texture/texCubemapLod.cc new file mode 100644 index 0000000000..0d33048197 --- /dev/null +++ b/catch/unit/texture/texCubemapLod.cc @@ -0,0 +1,201 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup texCubemapLod texCubemapLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + const auto ref_val = fixture.tex_h.TexCubemap(x, y, z, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `texCubemapLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/texCubemapLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(2, 2, 6); + params.num_subdivisions = 4; + params.cubemap = true; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); + const auto [num_threads_z, num_blocks_z] = GetLaunchConfig(10, params.NumItersZ()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + dim_grid.z = num_blocks_z; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + dim_block.z = num_threads_z; + + texCubemapLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), params.NumItersZ(), + fixture.tex.object(), params.Width(), params.Height(), params.Depth(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0.0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto k = 0u; k < params.NumItersZ(); ++k) { + for (auto j = 0u; j < params.NumItersY(); ++j) { + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + float z = GetCoordinate(k, params.NumItersZ(), params.Depth(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("i: " << i); + INFO("j: " << j); + INFO("k: " << k); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("Address mode Z: " << AddressModeToString(params.tex_desc.addressMode[2])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + INFO("z: " << std::fixed << std::setprecision(16) << z); + + auto index = k * params.NumItersX() * params.NumItersY() + j * params.NumItersX() + i; + + auto ref_val = Vec4Map(fixture.tex_h.TexCubemap(x, y, z, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); + } + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/texture_reference.hh b/catch/unit/texture/texture_reference.hh index 21e08abd0e..45f7dd8efa 100644 --- a/catch/unit/texture/texture_reference.hh +++ b/catch/unit/texture/texture_reference.hh @@ -52,6 +52,64 @@ template class TextureReference { } } + TexelType TexCubemap(float x, float y, float z, const hipTextureDesc& tex_desc) const { + x = tex_desc.normalizedCoords ? x * extent_.width : x; + y = tex_desc.normalizedCoords ? y * extent_.height : y; + z = tex_desc.normalizedCoords ? z * extent_.depth : z; + + int face; + float m, s, t; + + if (std::abs(x) > std::abs(y) && std::abs(x) > std::abs(z)) { + if (x >= 0) { + face = 0; + m = x; + s = -z; + t = -y; + } else { + face = 1; + m = -x; + s = z; + t = -y; + } + } else if (std::abs(y) >= std::abs(x) && std::abs(y) > std::abs(z)) { + if (y >= 0) { + face = 2; + m = y; + s = x; + t = z; + } else { + face = 3; + m = -y; + s = x; + t = -z; + } + } else { + if (z >= 0) { + face = 4; + m = z; + s = x; + t = -y; + } else { + face = 5; + m = -z; + s = -x; + t = -y; + } + } + + float coord1 = (s / m + 1) / 2; + float coord2 = (t / m + 1) / 2; + + if (tex_desc.filterMode == hipFilterModePoint) { + return Sample(roundf(coord1), roundf(coord2), face, tex_desc.addressMode); + } else if (tex_desc.filterMode == hipFilterModeLinear) { + return LinearFiltering(coord1, coord2, face, tex_desc.addressMode); + } else { + throw std::invalid_argument("Invalid hipFilterMode value"); + } + } + TexelType Tex1DLayered(float x, int layer, const hipTextureDesc& tex_desc) const { x = tex_desc.normalizedCoords ? x * extent_.width : x; if (tex_desc.filterMode == hipFilterModePoint) { @@ -236,10 +294,17 @@ template class TextureReference { return coord; } + template float FloatToNBitFractional(float x) const { + constexpr size_t mult = 1 << N; + const auto x_trunc = std::trunc(x); + const auto x_frac = std::round((x - x_trunc) * mult) / mult; + return x_trunc + x_frac; + } + std::tuple GetLinearFilteringParams(float coord) const { - const auto coordB = coord - 0.5f; + const auto coordB = FloatToNBitFractional<8>(coord - 0.5f); const auto index = floorf(coordB); - const FixedPoint<8> coeff = coordB - index; + const auto coeff = coordB - index; return {index, coeff}; } From d6dd4fd05b9135644bfa6c7904d9c7d6050a962b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:51:22 +0100 Subject: [PATCH 32/71] EXSWHTEC-297 - Introduce common utilities for memory ordering tests for builtin atomic operations #389 Change-Id: Iae1db918eab6a722c85ff00183c973dd8dd54e9b --- catch/unit/atomics/memory_order_common.hh | 436 ++++++++++++++++++++++ 1 file changed, 436 insertions(+) create mode 100644 catch/unit/atomics/memory_order_common.hh diff --git a/catch/unit/atomics/memory_order_common.hh b/catch/unit/atomics/memory_order_common.hh new file mode 100644 index 0000000000..5e37575c07 --- /dev/null +++ b/catch/unit/atomics/memory_order_common.hh @@ -0,0 +1,436 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include + +enum class BuiltinAtomicOperation { + kLoadStore = 0, + kExchange, + kCompareExchangeStrong, + kCompareExchangeWeak, + kAdd, + kAnd, + kOr, + kXor, + kMin, + kMax +}; + +template +__host__ __device__ void SetFlag(int* const flag) { +#ifdef __HIP_DEVICE_COMPILE__ + if constexpr (operation == BuiltinAtomicOperation::kLoadStore) { + static_assert(memory_order != __ATOMIC_ACQ_REL); + __hip_atomic_store(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kExchange) { + __hip_atomic_exchange(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) { + int compare = 0; + __hip_atomic_compare_exchange_strong(flag, &compare, 1, memory_order, __ATOMIC_RELAXED, + memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) { + int compare = 0; + while (!__hip_atomic_compare_exchange_weak(flag, &compare, 1, memory_order, __ATOMIC_RELAXED, + memory_scope)) + compare = 0; + } else if constexpr (operation == BuiltinAtomicOperation::kAdd) { + __hip_atomic_fetch_add(flag, 1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kAnd) { + __hip_atomic_fetch_and(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kOr) { + __hip_atomic_fetch_or(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kXor) { + __hip_atomic_fetch_xor(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMin) { + __hip_atomic_fetch_min(flag, -1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMax) { + __hip_atomic_fetch_max(flag, 1, memory_order, memory_scope); + } +#else + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + __atomic_store_n(flag, 0, __ATOMIC_RELEASE); + } else { + __atomic_store_n(flag, 1, __ATOMIC_RELEASE); + } +#endif +} + +template +__host__ __device__ int FetchFlag(int* const flag) { +#ifdef __HIP_DEVICE_COMPILE__ + if constexpr (operation == BuiltinAtomicOperation::kLoadStore) { + static_assert(memory_order != __ATOMIC_ACQ_REL); + return __hip_atomic_load(flag, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kExchange) { + return __hip_atomic_exchange(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeStrong) { + int compare = 1; + __hip_atomic_compare_exchange_strong( + flag, &compare, 1, memory_order, + memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope); + return compare; + } else if constexpr (operation == BuiltinAtomicOperation::kCompareExchangeWeak) { + int compare = 1; + __hip_atomic_compare_exchange_weak( + flag, &compare, 1, memory_order, + memory_order == __ATOMIC_ACQ_REL ? __ATOMIC_ACQUIRE : memory_order, memory_scope); + return compare; + } else if constexpr (operation == BuiltinAtomicOperation::kAdd) { + return __hip_atomic_fetch_add(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kAnd) { + return !__hip_atomic_fetch_and(flag, 0x1, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kOr) { + return __hip_atomic_fetch_or(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kXor) { + return __hip_atomic_fetch_xor(flag, 0x0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMin) { + return __hip_atomic_fetch_min(flag, 0, memory_order, memory_scope); + } else if constexpr (operation == BuiltinAtomicOperation::kMax) { + return __hip_atomic_fetch_max(flag, 0, memory_order, memory_scope); + } +#else + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + return !__atomic_load_n(flag, __ATOMIC_ACQUIRE); + } else { + return __atomic_load_n(flag, __ATOMIC_ACQUIRE); + } +#endif +} + +namespace AcquireRelease { + +constexpr auto kTestValue = 42; + +template +__host__ __device__ void Producer(int* const flag, int* const data) { + constexpr int actual_memory_order = + memory_order == __ATOMIC_ACQUIRE ? __ATOMIC_RELEASE : memory_order; + + data[0] = kTestValue; + + SetFlag(flag); +} + +template +__host__ __device__ void Consumer(int* const flag, int* const data, int* const ret) { + constexpr int actual_memory_order = + memory_order == __ATOMIC_RELEASE ? __ATOMIC_ACQUIRE : memory_order; + + while (!FetchFlag(flag)) + ; + + ret[0] = data[0]; +} + +template +__global__ void TestKernel(int* const flag, int* data, int* const ret) { + __shared__ int shared_mem; + + if (data == nullptr) data = &shared_mem; + + if (blockIdx.x == 0 && threadIdx.x == 0) { + if constexpr (operation == BuiltinAtomicOperation::kAnd) + *flag = 1; + else + *flag = 0; + } + __syncthreads(); + + bool producer = false, consumer = false; + + if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 0 && threadIdx.x == 1; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 0 && threadIdx.x == warpSize; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + producer = blockIdx.x == 0 && threadIdx.x == 0; + consumer = blockIdx.x == 1 && threadIdx.x == 0; + } + + if (producer) { + Producer(flag, data); + return; + } + + if (consumer) { + Consumer(flag, data, ret); + return; + } +} + +template +__global__ void ProducerKernel(int* const flag, int* const data) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Producer(flag, data); +} + +template +__global__ void ConsumerKernel(int* const flag, int* const data, int* const ret) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Consumer(flag, data, ret); +} + +template void Test() { + int blocks = 1, threads = 1; + if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + blocks = 1; + threads = 2; + } else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + blocks = 1; + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + threads = warp_size * 2; + } else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + blocks = 2; + threads = 1; + } + + LinearAllocGuard flag(LinearAllocs::hipMalloc, sizeof(int)); + LinearAllocGuard ret(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMalloc, LinearAllocs::hipMallocManaged); + LinearAllocGuard data(alloc_type, sizeof(int)); + TestKernel + <<>>(flag.ptr(), data.ptr(), ret.ptr()); + } + + if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) { + SECTION("Shared memory") { + TestKernel + <<>>(flag.ptr(), nullptr, ret.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(ret.ptr()[0] == kTestValue); +} + +template void SystemTest() { + std::thread host_thread; + + LinearAllocGuard flag(LinearAllocs::hipMallocManaged, sizeof(int)); + LinearAllocGuard ret(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipHostMalloc, LinearAllocs::hipMallocManaged); + LinearAllocGuard data(alloc_type, sizeof(int)); + + SECTION("Host producer - Device consumer") { + ConsumerKernel + <<<1, 1>>>(flag.ptr(), data.ptr(), ret.ptr()); + host_thread = std::thread([&] { + Producer(flag.ptr(), data.ptr()); + }); + } + + SECTION("Device producer - Host consumer") { + host_thread = std::thread([&] { + Consumer(flag.ptr(), data.ptr(), + ret.ptr()); + }); + ProducerKernel + <<<1, 1>>>(flag.ptr(), data.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + host_thread.join(); + + REQUIRE(ret.ptr()[0] == kTestValue); +} + +} /* namespace AcquireRelease */ + +namespace SequentialConsistency { + +template +__host__ __device__ void Producer(int* const flag) { + __atomic_store_n(flag, 1, __ATOMIC_SEQ_CST); +} + +template +__host__ __device__ void Consumer(int* const flag1, int* const flag2, int* const counter) { + while (!FetchFlag(flag1)) + ; + if (FetchFlag(flag2)) { +#ifdef __HIP_DEVICE_COMPILE__ + __hip_atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST, memory_scope); +#else + __atomic_fetch_add(counter, 1, __ATOMIC_SEQ_CST); +#endif + } +} + +template +__global__ void TestKernel(int* flag1, int* flag2, int* const counter) { + __shared__ int shared_mem[2]; + + if (flag1 == nullptr) flag1 = &shared_mem[0]; + if (flag2 == nullptr) flag2 = &shared_mem[1]; + + if (blockIdx.x == 0 && threadIdx.x == 0) { + if constexpr (operation == BuiltinAtomicOperation::kAnd) { + *flag1 = 1; + *flag2 = 1; + } else { + *flag1 = 0; + *flag2 = 0; + } + } + __syncthreads(); + + bool producer1 = false, producer2 = false, consumer1 = false, consumer2 = false; + + if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 0 && threadIdx.x == 1; + producer2 = blockIdx.x == 0 && threadIdx.x == 2; + consumer2 = blockIdx.x == 0 && threadIdx.x == 3; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 0 && threadIdx.x == warpSize; + producer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 2; + consumer2 = blockIdx.x == 0 && threadIdx.x == warpSize * 3; + } else if constexpr (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + producer1 = blockIdx.x == 0 && threadIdx.x == 0; + consumer1 = blockIdx.x == 1 && threadIdx.x == 0; + producer2 = blockIdx.x == 2 && threadIdx.x == 0; + consumer2 = blockIdx.x == 3 && threadIdx.x == 0; + } + + if (producer1) { + Producer(flag1); + return; + } + + if (consumer1) { + Consumer(flag1, flag2, counter); + return; + } + + if (producer2) { + Producer(flag2); + return; + } + + if (consumer2) { + Consumer(flag2, flag1, counter); + return; + } +} + +template +__global__ void ProducerKernel(int* const flag) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Producer(flag); +} + +template +__global__ void ConsumerKernel(int* const flag1, int* const flag2, int* const counter) { + if (!(blockIdx.x == 0 && threadIdx.x == 0)) { + return; + } + + Consumer(flag1, flag2, counter); +} + +template void Test() { + int blocks = 1, threads = 1; + if (memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + blocks = 1; + threads = 4; + } else if (memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP) { + blocks = 1; + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + threads = warp_size * 4; + } else if (memory_scope == __HIP_MEMORY_SCOPE_AGENT) { + blocks = 4; + threads = 1; + } + + LinearAllocGuard counter(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMalloc); + LinearAllocGuard flag1(alloc_type, sizeof(int)); + LinearAllocGuard flag2(alloc_type, sizeof(int)); + TestKernel + <<>>(flag1.ptr(), flag2.ptr(), counter.ptr()); + } + + if (memory_scope != __HIP_MEMORY_SCOPE_AGENT && memory_scope != __HIP_MEMORY_SCOPE_SYSTEM) { + SECTION("Shared memory") { + TestKernel<<>>(nullptr, nullptr, counter.ptr()); + } + } + + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(counter.ptr()[0] != 0); +} + +template void SystemTest() { + std::thread host_producer, host_consumer; + + LinearAllocGuard counter(LinearAllocs::hipMallocManaged, sizeof(int)); + + SECTION("Global memory") { + const auto alloc_type = GENERATE(LinearAllocs::hipMallocManaged); + LinearAllocGuard flag1(alloc_type, sizeof(int)); + LinearAllocGuard flag2(alloc_type, sizeof(int)); + + ConsumerKernel + <<<1, 1>>>(flag1.ptr(), flag2.ptr(), counter.ptr()); + host_consumer = std::thread([&] { + Consumer(flag2.ptr(), flag1.ptr(), counter.ptr()); + }); + + ProducerKernel<<<1, 1>>>(flag1.ptr()); + host_producer = + std::thread([&] { Producer(flag2.ptr()); }); + } + + HIP_CHECK(hipDeviceSynchronize()); + host_producer.join(); + host_consumer.join(); + + REQUIRE(counter.ptr()[0] != 0); +} + +} // namespace SequentialConsistency \ No newline at end of file From ea53809db177914ec14df44acc55fce70bdf3348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:52:35 +0100 Subject: [PATCH 33/71] EXSWHTEC-297 - Implement memory ordering tests for builtin atomic operations #390 Change-Id: I9ed4ff6ec1e7b8aecc2e1c3ba5883c73901ba2e2 --- catch/unit/atomics/CMakeLists.txt | 2 + catch/unit/atomics/acquire_release.cc | 551 +++++++++++++++++++ catch/unit/atomics/sequential_consistency.cc | 165 ++++++ 3 files changed, 718 insertions(+) create mode 100644 catch/unit/atomics/acquire_release.cc create mode 100644 catch/unit/atomics/sequential_consistency.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index 1c40746dff..559165977f 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -36,6 +36,8 @@ set(TEST_SRC __hip_atomic_fetch_min.cc __hip_atomic_fetch_max.cc atomic_builtins.cc + acquire_release.cc + sequential_consistency.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc diff --git a/catch/unit/atomics/acquire_release.cc b/catch/unit/atomics/acquire_release.cc new file mode 100644 index 0000000000..7e0996f566 --- /dev/null +++ b/catch/unit/atomics/acquire_release.cc @@ -0,0 +1,551 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "memory_order_common.hh" + +TEST_CASE("Unit___hip_atomic_load_store_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_exchange_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} + +TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Acquire_Release") { + SECTION("ACQUIRE/RELEASE") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("ACQ_REL") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } + SECTION("SEQ_CST") { + SECTION("WAVEFRONT") { + AcquireRelease::Test(); + } + SECTION("WORKGROUP") { + AcquireRelease::Test(); + } + SECTION("AGENT") { + AcquireRelease::Test(); + } + SECTION("SYSTEM") { + AcquireRelease::SystemTest(); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/sequential_consistency.cc b/catch/unit/atomics/sequential_consistency.cc new file mode 100644 index 0000000000..c37b26487a --- /dev/null +++ b/catch/unit/atomics/sequential_consistency.cc @@ -0,0 +1,165 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "memory_order_common.hh" + +TEST_CASE("Unit___hip_atomic_load_store_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_exchange_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { + SequentialConsistency::SystemTest(); + } +} + +TEST_CASE("Unit___hip_atomic_compare_exchange_weak_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { + SequentialConsistency::SystemTest(); + } +} + +TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_min_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} + +TEST_CASE("Unit___hip_atomic_fetch_max_Positive_Sequential_Consistency") { + SECTION("WAVEFRONT") { + SequentialConsistency::Test(); + } + SECTION("WORKGROUP") { + SequentialConsistency::Test(); + } + SECTION("AGENT") { + SequentialConsistency::Test(); + } + SECTION("SYSTEM") { SequentialConsistency::SystemTest(); } +} \ No newline at end of file From 488e620fdb812eee49e8717a38f72c813e916c11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:49:27 +0100 Subject: [PATCH 34/71] EXSWHTEC-296 - Implement negative compile tests for builtin atomic operations #391 Change-Id: I9f77a777e23a5dcc304bb5d7a996ca140c6e7814 --- catch/unit/atomics/CMakeLists.txt | 4 + catch/unit/atomics/atomic_builtin_kernels.cc | 458 +++++++++++++++++++ 2 files changed, 462 insertions(+) create mode 100644 catch/unit/atomics/atomic_builtin_kernels.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index 559165977f..bba917bc2f 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -91,6 +91,10 @@ add_test(NAME Unit_atomicMax_Negative_Parameters COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicMax_negative_kernels.cc ${EXPECTED_ERRORS}) +add_test(NAME Unit_AtomicBuiltins_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomic_builtins_kernels.cc 60 27) # Should be 35 warnings, see EXSWHTEC-309 # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters diff --git a/catch/unit/atomics/atomic_builtin_kernels.cc b/catch/unit/atomics/atomic_builtin_kernels.cc new file mode 100644 index 0000000000..27cd3eb95e --- /dev/null +++ b/catch/unit/atomics/atomic_builtin_kernels.cc @@ -0,0 +1,458 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +constexpr int kMemOrder = __ATOMIC_RELAXED; +constexpr int kMemScope = __HIP_MEMORY_SCOPE_SYSTEM; + +// Trivially-copyable class. +class DummyTC { + public: + __device__ DummyTC() {} + __device__ ~DummyTC() = default; + __device__ DummyTC(const DummyTC&) = default; + __device__ DummyTC& operator=(const DummyTC&) = default; + __device__ DummyTC(DummyTC&&) = default; + __device__ DummyTC& operator=(DummyTC&&) = default; +}; + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +__global__ void StoreCompileKernel(int* x) { + // Valid combinations + __hip_atomic_store(x, 1, __ATOMIC_RELAXED, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_RELEASE, kMemScope); + __hip_atomic_store(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + __hip_atomic_store(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + __hip_atomic_store(*x, 1, kMemOrder, kMemScope); + // Consume not allowed by C++1 for store + __hip_atomic_store(x, 1, __ATOMIC_CONSUME, kMemScope); + // Acquire not allowed by C++11 for store + __hip_atomic_store(x, 1, __ATOMIC_ACQUIRE, kMemScope); + // Acquire-Release not allowed by C++11 for store + __hip_atomic_store(x, 1, __ATOMIC_ACQ_REL, kMemScope); + // Memory order is out of bounds + __hip_atomic_store(x, 1, -1, kMemScope); + __hip_atomic_store(x, 1, 10, kMemScope); + // Memory scope is out of bounds + __hip_atomic_store(x, 1, kMemOrder, -1); + __hip_atomic_store(x, 1, kMemOrder, 10); + + // Storing an object that is not trivially-copyable + Dummy dummy_a{}; + Dummy dummy_b{}; + __hip_atomic_store(&dummy_a, dummy_b, kMemOrder, kMemScope); + + // Storing an object that is trivially-copyable + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + __hip_atomic_store(&dummytc_a, dummytc_b, kMemOrder, kMemScope); +} + +__global__ void LoadCompileKernel(int* x, int* y) { + // Valid combinations + *y = __hip_atomic_load(x, __ATOMIC_RELAXED, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_CONSUME, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_ACQUIRE, kMemScope); + *y = __hip_atomic_load(x, __ATOMIC_SEQ_CST, kMemScope); + + // Value instead of pointer to the atomic builtin for 1st parameter + *y = __hip_atomic_load(*x, kMemOrder, kMemScope); + // Release not allowed by C++11 for load + *y = __hip_atomic_load(x, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed by C++11 for load + *y = __hip_atomic_load(x, __ATOMIC_ACQ_REL, kMemScope); + // Memory order is out of bounds + *y = __hip_atomic_load(x, -1, kMemScope); + *y = __hip_atomic_load(x, 10, kMemScope); + // Memory scope is out of bounds + *y = __hip_atomic_load(x, kMemOrder, -1); + *y = __hip_atomic_load(x, kMemOrder, 10); + + // Loading an object that is not trivially-copyable + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_a = __hip_atomic_load(&dummy_b, kMemOrder, kMemScope); + + // Loading an object that is trivially-copyable + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_a = __hip_atomic_load(&dummytc_b, kMemOrder, kMemScope); +} + +__global__ void CompareWeakCompileKernel(int* x, int* expected) { + bool res{false}; + // Valid combinations + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + // Release not allowed on fail by C++11 + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed on fail by C++11 + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + // Fail stronger than success + res = __hip_atomic_compare_exchange_weak(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + // Pointer to a non-const type + res = __hip_atomic_compare_exchange_weak(reinterpret_cast(x), expected, 1, kMemOrder, + kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + res = __hip_atomic_compare_exchange_weak(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + // Memory order on success is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, 10, kMemOrder, kMemScope); + // Memory order on failure is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, 10, kMemScope); + // Memory scope is out of bounds + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_weak(x, expected, 1, kMemOrder, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_weak(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_weak(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, kMemOrder, + kMemScope); +} + +__global__ void CompareStrongCompileKernel(int* x, int* expected) { + bool res{false}; + // Valid combinations + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_CONSUME, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_CONSUME, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL, + kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST, + kMemScope); + + // Release not allowed on fail by C++11 + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_RELEASE, kMemScope); + // Acquire-Release not allowed on fail by C++11 + res = + __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, __ATOMIC_ACQ_REL, kMemScope); + // Fail stronger than success + res = __hip_atomic_compare_exchange_strong(x, expected, 1, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, + kMemScope); + // Pointer to a non-const type + res = __hip_atomic_compare_exchange_strong(reinterpret_cast(x), expected, 1, + kMemOrder, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin for 1st parameter + res = __hip_atomic_compare_exchange_strong(*x, expected, 1, kMemOrder, kMemOrder, kMemScope); + // Memory order on success is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, -1, kMemOrder, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, 10, kMemOrder, kMemScope); + // Memory order on failure is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, -1, kMemScope); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, 10, kMemScope); + // Memory scope is out of bounds + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, -1); + res = __hip_atomic_compare_exchange_strong(x, expected, 1, kMemOrder, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + Dummy dummy_c{}; + res = __hip_atomic_compare_exchange_strong(&dummy_a, &dummy_b, dummy_c, kMemOrder, kMemOrder, + kMemScope); + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + DummyTC dummytc_c{}; + res = __hip_atomic_compare_exchange_strong(&dummytc_a, &dummytc_b, dummytc_c, kMemOrder, + kMemOrder, kMemScope); +} + +__global__ void ExchangeCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_exchange(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_exchange(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_exchange(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_exchange(x, 1, -1, kMemScope); + old = __hip_atomic_exchange(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_exchange(x, 1, kMemOrder, -1); + old = __hip_atomic_exchange(x, 1, kMemOrder, 10); + + // User-defined class is not trivially-copyable and therefore cannot be atomically copied + Dummy dummy_a{}; + Dummy dummy_b{}; + dummy_b = __hip_atomic_exchange(&dummy_a, dummy_b, kMemOrder, kMemScope); + + // User-defined class is trivially-copyable and can be atomically copied + DummyTC dummytc_a{}; + DummyTC dummytc_b{}; + dummytc_b = __hip_atomic_exchange(&dummytc_a, dummytc_b, kMemOrder, kMemScope); +} + +__global__ void FetchAddCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_add(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_add(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_add(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_add(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_add(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_add(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_add(x, 1, kMemOrder, 10); + + Dummy dummy{}; + old = __hip_atomic_fetch_add(&dummy, 1, kMemOrder, kMemScope); +} + +__global__ void FetchAndCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_and(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_and(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_and(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_and(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_and(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_and(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_and(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_and(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_and(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_and(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchOrCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_or(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_or(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_or(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_or(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_or(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_or(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_or(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_or(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_or(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_or(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchXorCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_xor(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_xor(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_xor(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_xor(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_xor(x, 1, kMemOrder, 10); + + // Value must be an integer + Dummy dummy{}; + old = __hip_atomic_fetch_xor(&dummy, 1, kMemOrder, kMemScope); + float float_var{1.5f}; + old = __hip_atomic_fetch_xor(&float_var, 1, kMemOrder, kMemScope); + double double_var{1.5}; + old = __hip_atomic_fetch_xor(&double_var, 1, kMemOrder, kMemScope); +} + +__global__ void FetchMaxCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_max(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_max(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_max(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_max(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_max(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_max(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_max(x, 1, kMemOrder, 10); + + // Value must be integer or floating point type + Dummy dummy{}; + old = __hip_atomic_fetch_max(&dummy, 1, kMemOrder, kMemScope); +} + +__global__ void FetchMinCompileKernel(int* x) { + int old{}; + // Valid combinations + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELAXED, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_CONSUME, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQUIRE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_RELEASE, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_ACQ_REL, kMemScope); + old = __hip_atomic_fetch_min(x, 1, __ATOMIC_SEQ_CST, kMemScope); + + // Pointer to a non-const type + old = __hip_atomic_fetch_min(reinterpret_cast(x), 1, kMemOrder, kMemScope); + // Value instead of pointer to the atomic builtin + old = __hip_atomic_fetch_min(*x, 1, kMemOrder, kMemScope); + // Memory order out of bounds + old = __hip_atomic_fetch_min(x, 1, -1, kMemScope); + old = __hip_atomic_fetch_min(x, 1, 10, kMemScope); + // Memory scope out of bounds + old = __hip_atomic_fetch_min(x, 1, kMemOrder, -1); + old = __hip_atomic_fetch_min(x, 1, kMemOrder, 10); + + // Value must be integer or floating point type + Dummy dummy{}; + old = __hip_atomic_fetch_min(&dummy, 1, kMemOrder, kMemScope); +} From 8ee015d1ef00869536b2c14b7f312912753273f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:51:36 +0100 Subject: [PATCH 35/71] EXSWHTEC-262 - Introduce common utilities for atomic arithmetic operations #392 Change-Id: I016e571d15b6b6c3a1b91a4bb4b5410e338efe49 --- catch/unit/atomics/arithmetic_common.hh | 466 ++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 catch/unit/atomics/arithmetic_common.hh diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh new file mode 100644 index 0000000000..0be866390e --- /dev/null +++ b/catch/unit/atomics/arithmetic_common.hh @@ -0,0 +1,466 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +// Atomic operations for which the tests in this file apply for +enum class AtomicOperation { + kAdd = 0, + kAddSystem, + kSub, + kSubSystem, + kInc, + kDec, + kUnsafeAdd, + kSafeAdd +}; + +// Constants that are passed as operands to the atomic operations +constexpr auto kIntegerTestValue = 7; +constexpr auto kFloatingPointTestValue = 3.125; +constexpr auto kIncDecWraparoundValue = 1023; + +// Retrieves test value constant based on the atomic operation and test type: +// - kIncDecWraparoundValue for increment and decrement operations +// - kFloatingPointTestValue for floating point test type +// - kIntegerTestValue for integer test type +template +__host__ __device__ TestType GetTestValue() { + if constexpr (operation == AtomicOperation::kInc || operation == AtomicOperation::kDec) { + return kIncDecWraparoundValue; + } + + return std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; +} + +// Performs an atomic operation on parameter `mem` based on the `operation` enumerator. +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto val = GetTestValue(); + + if constexpr (operation == AtomicOperation::kAdd) { + return atomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kAddSystem) { + return atomicAdd_system(mem, val); + } else if constexpr (operation == AtomicOperation::kSub) { + return atomicSub(mem, val); + } else if constexpr (operation == AtomicOperation::kSubSystem) { + return atomicSub_system(mem, val); + } else if constexpr (operation == AtomicOperation::kInc) { + return atomicInc(mem, val); + } else if constexpr (operation == AtomicOperation::kDec) { + return atomicDec(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeAdd) { + return unsafeAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeAdd) { + return safeAtomicAdd(mem, val); + } +} + +// This kernel executes the atomic operation specified by the enumerator `operation`. Results of +// the atomic operations are stored in `old_vals`. Each thread executes the atomic operation on the +// same memory location `global_mem`. +// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic +// operations are executed on shared memory, and the result is copied back to `global_mem`. +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +// Indexes array `ptr`, with the size in bytes of each element specified by `pitch` +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +// This kernel executes the atomic operation specified by the enumerator `operation`. Results of the +// atomic operations are stored in `old_vals`. `global_mem` is an array with `width` number of +// elements. Each thread performs the atomic operation on the element that corresponds to its thread +// id (tid % width). +// The elements of `global_mem` can be larger than sizeof(TestType) with the actual size in bytes +// specified by `pitch`. This is done so we can test scenarios where threads target memory locations +// that are scattered over different cache lines. +// If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic operations +// are executed on shared memory, and the result is copied back to `global_mem`. +// +// For example, given that sizeof(TestType) is 1, `width` is 3, and `pitch` is 4: +// +// 0 1 2 3 4 5 6 7 8 9 10 11 +// global_mem -> | x | | | | x | | | | x | | | | +// | pitch | pitch | pitch | +// +// In this scenario, the atomic operations will target the elements denoted with `x` (addresses 0, +// 4, 8). +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + old_vals[tid] = + PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +// Used to configure test run +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + auto HostIterationsPerThread() const { + return std::max(num_devices * kernel_count * ThreadCount() / 20, width); + } + + dim3 blocks; // number of blocks per kernel launch + dim3 threads; // number of threads per kernel launch + unsigned int num_devices = 1u; // number of devices used + unsigned int kernel_count = 1u; // number of kernels launched per device + unsigned int width = 1u; // number of memory locations targeted + unsigned int pitch = 0u; // defines spacing between memory locations + unsigned int host_thread_count = 0u; // number of host threads launched + LinearAllocs alloc_type; // type of allocation used +}; + +// Reference implementation used to verify results +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto val = GetTestValue(); + + const auto total_thread_count = p.num_devices * p.kernel_count * p.ThreadCount() + + p.host_thread_count * p.HostIterationsPerThread(); + + std::vector res_vals(p.width); + std::vector old_vals; + old_vals.reserve(total_thread_count); + + auto perform_op = [&](unsigned id) { + auto& res = res_vals[id % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || + operation == AtomicOperation::kUnsafeAdd || + operation == AtomicOperation::kSafeAdd) { + res = res + val; + } else if constexpr (operation == AtomicOperation::kSub || + operation == AtomicOperation::kSubSystem) { + res = res - val; + } else if constexpr (operation == AtomicOperation::kInc) { + res = (res >= val) ? 0 : res + 1; + } else if constexpr (operation == AtomicOperation::kDec) { + res = ((res == 0) || (res > val)) ? val : res - 1; + } + }; + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + for (auto tid = 0u; tid < p.ThreadCount(); ++tid) { + perform_op(tid); + } + } + } + + for (auto i = 0u; i < p.host_thread_count; ++i) { + for (auto j = 0u; j < p.HostIterationsPerThread(); ++j) { + perform_op(j); + } + } + + return {res_vals, old_vals}; +} + +// Compares the results of the test kernel stored in `res_vals` with results generated by the +// reference implementation +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +// Launches the test kernel +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +// Performs a host atomic operation on parameter `mem` based on the `operation` enumerator. +template +void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch, TestType base_val) { + const auto val = GetTestValue(); + + for (auto i = 0u; i < iterations; ++i) { + if constexpr (operation == AtomicOperation::kAddSystem) { + old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); + } else if constexpr (operation == AtomicOperation::kSubSystem) { + old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); + } + } +} + +// Launches host threads based on TestParams::host_thread_count that compete with the test kernel +// for the same resources +template +void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* const old_vals) { + if (p.host_thread_count == 0) { + return; + } + + const auto host_base_val = p.num_devices * p.kernel_count * p.ThreadCount(); + + std::vector threads; + for (auto i = 0u; i < p.host_thread_count; ++i) { + const auto iterations = p.HostIterationsPerThread(); + const auto thread_base_val = host_base_val + i * iterations; + threads.push_back(std::thread(HostAtomicOperation, iterations, mem, + old_vals + thread_base_val, p.width, p.pitch, thread_base_val)); + } + + for (auto& th : threads) { + th.join(); + } +} + +// This is the main body of the test: +// 1. Allocate memory based on TestParams::alloc_type +// 2. Launch kernels based on TestParams::num_devices and TestParams::kernel_count +// 3. Launch host threads based on TestParams::host_thread_count +// 4. Verify the results +template +void TestCore(const TestParams& p) { + const unsigned int flags = + p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u; + + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size, flags); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount() + + p.host_thread_count * p.HostIterationsPerThread()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + } + } + + PerformHostAtomicOperation(p, mem_dev.host_ptr(), old_vals.data()); + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); } + +inline dim3 GenerateBlockDimensions() { + int sm_count = 0; + HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0)); + return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2)); +} + +// Configures and creates the TestCore for a single device, and a single kernel launch +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + params.blocks = GenerateBlockDimensions(); + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +// Configures and creates the TestCore for a single device, and multiple kernel launches +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +// Configures and creates the TestCore for a multiple devices (and host), and multiple kernel +// launches +template +void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices, + const unsigned int kernel_count, + const unsigned int width, const unsigned int pitch, + const unsigned int host_thread_count = 0u) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GenerateBlockDimensions(); + params.threads = GenerateThreadDimensions(); + params.width = width; + params.pitch = pitch; + params.host_thread_count = host_thread_count; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} \ No newline at end of file From e70cadd514f34df13f9d6786c54b11917be7b02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:41:42 +0100 Subject: [PATCH 36/71] EXSWHTEC-262 - Implement tests for atomic add operations #393 Change-Id: I11ff4658ef2076b25e0bdb91bbcd4f436b0fbd27 --- catch/unit/atomics/CMakeLists.txt | 9 + catch/unit/atomics/atomicAdd.cc | 167 +++++++++++ .../atomics/atomicAdd_negative_kernels.cc | 219 ++++++++++++++ .../atomics/atomicAdd_negative_kernels_rtc.hh | 273 ++++++++++++++++++ catch/unit/atomics/atomicAdd_system.cc | 177 ++++++++++++ catch/unit/atomics/safeAtomicAdd.cc | 123 ++++++++ catch/unit/atomics/unsafeAtomicAdd.cc | 124 ++++++++ 7 files changed, 1092 insertions(+) create mode 100644 catch/unit/atomics/atomicAdd.cc create mode 100644 catch/unit/atomics/atomicAdd_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicAdd_system.cc create mode 100644 catch/unit/atomics/safeAtomicAdd.cc create mode 100644 catch/unit/atomics/unsafeAtomicAdd.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index bba917bc2f..f7ab04b3b6 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -38,6 +38,10 @@ set(TEST_SRC atomic_builtins.cc acquire_release.cc sequential_consistency.cc + atomicAdd.cc + atomicAdd_system.cc + unsafeAtomicAdd.cc + safeAtomicAdd.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc @@ -47,6 +51,7 @@ set(TEST_SRC ) if(HIP_PLATFORM MATCHES "nvidia") + set_source_files_properties(atomicAdd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") @@ -95,6 +100,10 @@ add_test(NAME Unit_AtomicBuiltins_Negative_Parameters COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomic_builtins_kernels.cc 60 27) # Should be 35 warnings, see EXSWHTEC-309 +add_test(NAME Unit_atomicAdd_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicAdd_negative_kernels.cc 48) # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters diff --git a/catch/unit/atomics/atomicAdd.cc b/catch/unit/atomics/atomicAdd.cc new file mode 100644 index 0000000000..76eef23ac8 --- /dev/null +++ b/catch/unit/atomics/atomicAdd.cc @@ -0,0 +1,167 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicAdd_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicAdd atomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_Positive_Multi_Kernel", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicAdd. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicAdd_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicAdd_int, kAtomicAdd_uint, kAtomicAdd_ulong, + kAtomicAdd_ulonglong, kAtomicAdd_float, kAtomicAdd_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicAdd_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicAdd_negative_kernels.cc b/catch/unit/atomics/atomicAdd_negative_kernels.cc new file mode 100644 index 0000000000..e0e8112cdf --- /dev/null +++ b/catch/unit/atomics/atomicAdd_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicAdd(int* address, int val) */ +__global__ void atomicAdd_int_v1(int* address, int* result) { *result = atomicAdd(&address, 1234); } + +__global__ void atomicAdd_int_v2(int* address, int* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_int_v3(int* address, int* result) { *result = atomicAdd(1234, 1234); } + +__global__ void atomicAdd_int_v4(Dummy* address, int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_int_v5(char* address, int* result) { *result = atomicAdd(address, 1234); } + +__global__ void atomicAdd_int_v6(short* address, int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_int_v7(long* address, int* result) { *result = atomicAdd(address, 1234); } + +__global__ void atomicAdd_int_v8(long long* address, int* result) { + *result = atomicAdd(address, 1234); +} + +/* unsigned int atomicAdd(unsigned int* address, unsigned int val) */ +__global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v5(char* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v6(short* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v7(long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(unsigned long* address, unsigned long val) */ +__global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(unsigned long long* address, unsigned long long val) */ +__global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(&address, 1234); +} + +__global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(1234, 1234); +} + +__global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +__global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(float* address, float val) */ +__global__ void atomicAdd_float_v1(float* address, float* result) { + *result = atomicAdd(&address, 1234.f); +} + +__global__ void atomicAdd_float_v2(float* address, float* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_float_v3(float* address, float* result) { + *result = atomicAdd(1234.f, 1234.f); +} + +__global__ void atomicAdd_float_v4(Dummy* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v5(char* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v6(short* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v7(long* address, float* result) { + *result = atomicAdd(address, 1234.f); +} + +__global__ void atomicAdd_float_v8(long long* address, float* result) { + *result = atomicAdd(address, 1234); +} + +/* atomicAdd(double* address, double val) */ +__global__ void atomicAdd_double_v1(double* address, double* result) { + *result = atomicAdd(&address, 1234.0); +} + +__global__ void atomicAdd_double_v2(double* address, double* result) { + *result = atomicAdd(address, address); +} + +__global__ void atomicAdd_double_v3(double* address, double* result) { + *result = atomicAdd(1234.0, 1234.0); +} + +__global__ void atomicAdd_double_v4(Dummy* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v5(char* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v6(short* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v7(long* address, double* result) { + *result = atomicAdd(address, 1234.0); +} + +__global__ void atomicAdd_double_v8(long long* address, double* result) { + *result = atomicAdd(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh b/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh new file mode 100644 index 0000000000..c5141d03bc --- /dev/null +++ b/catch/unit/atomics/atomicAdd_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicAdd_int{ + R"( + __global__ void atomicAdd_int_v1(int* address, int* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_int_v2(int* address, int* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_int_v3(int* address, int* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_int_v4(Dummy* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v5(char* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v6(short* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v7(long* address, int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_int_v8(long long* address, int* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_uint{ + R"( + __global__ void atomicAdd_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v5(char* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v6(short* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v7(long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_uint_v8(long long* address, unsigned int* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_ulong{ + R"( + __global__ void atomicAdd_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v5(char* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v6(short* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v7(long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulong_v8(long long* address, unsigned long* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_ulonglong{ + R"( + __global__ void atomicAdd_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(&address, 1234); + } + + __global__ void atomicAdd_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicAdd(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + + __global__ void atomicAdd_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_float{ + R"( + __global__ void atomicAdd_float_v1(float* address, float* result) { + *result = atomicAdd(&address, 1234.f); + } + + __global__ void atomicAdd_float_v2(float* address, float* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_float_v3(float* address, float* result) { + *result = atomicAdd(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_float_v4(Dummy* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v5(char* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v6(short* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v7(long* address, float* result) { + *result = atomicAdd(address, 1234.f); + } + + __global__ void atomicAdd_float_v8(long long* address, float* result) { + *result = atomicAdd(address, 1234); + } + )"}; + +static constexpr auto kAtomicAdd_double{ + R"( + __global__ void atomicAdd_double_v1(double* address, double* result) { + *result = atomicAdd(&address, 1234.0); + } + + __global__ void atomicAdd_double_v2(double* address, double* result) { + *result = atomicAdd(address, address); + } + + __global__ void atomicAdd_double_v3(double* address, double* result) { + *result = atomicAdd(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicAdd_double_v4(Dummy* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v5(char* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v6(short* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v7(long* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + + __global__ void atomicAdd_double_v8(long long* address, double* result) { + *result = atomicAdd(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicAdd_system.cc b/catch/unit/atomics/atomicAdd_system.cc new file mode 100644 index 0000000000..c51ce0ad1f --- /dev/null +++ b/catch/unit/atomics/atomicAdd_system.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicAdd_system atomicAdd_system + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. While + * the kernel is running, the host performs atomic additions, in 4 threads, on the same memory + * location(s). Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. While + * the kernel is running, the host performs atomic additions, in 4 threads, on the same memory + * location(s). Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicAdd_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicAdd_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicAdd_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} diff --git a/catch/unit/atomics/safeAtomicAdd.cc b/catch/unit/atomics/safeAtomicAdd.cc new file mode 100644 index 0000000000..cfc760a7ce --- /dev/null +++ b/catch/unit/atomics/safeAtomicAdd.cc @@ -0,0 +1,123 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup safeAtomicAdd safeAtomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of safeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/safeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic addition on a target memory location. Each thread will add the same value to + * the memory location, storing the return value into a separate output array slot corresponding + * to it. Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of safeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicAdd_Positive_Multi_Kernel", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} diff --git a/catch/unit/atomics/unsafeAtomicAdd.cc b/catch/unit/atomics/unsafeAtomicAdd.cc new file mode 100644 index 0000000000..8c717c7bf5 --- /dev/null +++ b/catch/unit/atomics/unsafeAtomicAdd.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicAdd unsafeAtomicAdd + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of unsafeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic addition on a target memory location. Each thread will add the same value to + * the memory location, storing the return value into a separate output array slot corresponding + * to it. Once complete, the output array and target memory is validated to contain all the + * expected values. Several memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of unsafeAtomicAdd + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicAdd.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicAdd_Positive_Multi_Kernel", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} From cc957255f3a81f547f0c2e189cf3ae75e966dba7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:35:05 +0100 Subject: [PATCH 37/71] EXSWHTEC-262 - Implement tests for atomic sub operations #394 Change-Id: Ib817f1245695b9eeaaa11d7a2ff9b2eb4ca4f72d --- catch/unit/atomics/CMakeLists.txt | 7 + catch/unit/atomics/atomicSub.cc | 167 +++++++++++ .../atomics/atomicSub_negative_kernels.cc | 219 ++++++++++++++ .../atomics/atomicSub_negative_kernels_rtc.cc | 273 ++++++++++++++++++ catch/unit/atomics/atomicSub_system.cc | 177 ++++++++++++ 5 files changed, 843 insertions(+) create mode 100644 catch/unit/atomics/atomicSub.cc create mode 100644 catch/unit/atomics/atomicSub_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicSub_negative_kernels_rtc.cc create mode 100644 catch/unit/atomics/atomicSub_system.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index f7ab04b3b6..d58bca3bca 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -42,6 +42,8 @@ set(TEST_SRC atomicAdd_system.cc unsafeAtomicAdd.cc safeAtomicAdd.cc + atomicSub.cc + atomicSub_system.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc @@ -52,6 +54,7 @@ set(TEST_SRC if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(atomicAdd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicSub_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") @@ -104,6 +107,10 @@ add_test(NAME Unit_atomicAdd_Negative_Parameters COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicAdd_negative_kernels.cc 48) +add_test(NAME Unit_atomicSub_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicSub_negative_kernels.cc 48) # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters diff --git a/catch/unit/atomics/atomicSub.cc b/catch/unit/atomics/atomicSub.cc new file mode 100644 index 0000000000..75d1678c46 --- /dev/null +++ b/catch/unit/atomics/atomicSub.cc @@ -0,0 +1,167 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicSub_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicSub atomicSub + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * subtraction on a target memory location. Each thread will subtract the same value from the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_Positive", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic subtraction on a target memory location. Each thread will subtract the same value from + * the memory location, storing the return value into a separate output array slot corresponding to + * it. Once complete, the output array and target memory is validated to contain all the expected + * values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_Positive_Multi_Kernel", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicSub. + * Test source + * ------------------------ + * - unit/atomics/atomicSub.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicSub_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicSub_int, kAtomicSub_uint, kAtomicSub_ulong, + kAtomicSub_ulonglong, kAtomicSub_float, kAtomicSub_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicSub_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicSub_negative_kernels.cc b/catch/unit/atomics/atomicSub_negative_kernels.cc new file mode 100644 index 0000000000..c13b243db1 --- /dev/null +++ b/catch/unit/atomics/atomicSub_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicSub(int* address, int val) */ +__global__ void atomicSub_int_v1(int* address, int* result) { *result = atomicSub(&address, 1234); } + +__global__ void atomicSub_int_v2(int* address, int* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_int_v3(int* address, int* result) { *result = atomicSub(1234, 1234); } + +__global__ void atomicSub_int_v4(Dummy* address, int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_int_v5(char* address, int* result) { *result = atomicSub(address, 1234); } + +__global__ void atomicSub_int_v6(short* address, int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_int_v7(long* address, int* result) { *result = atomicSub(address, 1234); } + +__global__ void atomicSub_int_v8(long long* address, int* result) { + *result = atomicSub(address, 1234); +} + +/* unsigned int atomicSub(unsigned int* address, unsigned int val) */ +__global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v5(char* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v6(short* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v7(long* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_uint_v8(long long* address, unsigned int* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(unsigned long* address, unsigned long val) */ +__global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v5(char* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v6(short* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v7(long* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(unsigned long long* address, unsigned long long val) */ +__global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(&address, 1234); +} + +__global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(1234, 1234); +} + +__global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +__global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(float* address, float val) */ +__global__ void atomicSub_float_v1(float* address, float* result) { + *result = atomicSub(&address, 1234.f); +} + +__global__ void atomicSub_float_v2(float* address, float* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_float_v3(float* address, float* result) { + *result = atomicSub(1234.f, 1234.f); +} + +__global__ void atomicSub_float_v4(Dummy* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v5(char* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v6(short* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v7(long* address, float* result) { + *result = atomicSub(address, 1234.f); +} + +__global__ void atomicSub_float_v8(long long* address, float* result) { + *result = atomicSub(address, 1234); +} + +/* atomicSub(double* address, double val) */ +__global__ void atomicSub_double_v1(double* address, double* result) { + *result = atomicSub(&address, 1234.0); +} + +__global__ void atomicSub_double_v2(double* address, double* result) { + *result = atomicSub(address, address); +} + +__global__ void atomicSub_double_v3(double* address, double* result) { + *result = atomicSub(1234.0, 1234.0); +} + +__global__ void atomicSub_double_v4(Dummy* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v5(char* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v6(short* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v7(long* address, double* result) { + *result = atomicSub(address, 1234.0); +} + +__global__ void atomicSub_double_v8(long long* address, double* result) { + *result = atomicSub(address, 1234.0); +} diff --git a/catch/unit/atomics/atomicSub_negative_kernels_rtc.cc b/catch/unit/atomics/atomicSub_negative_kernels_rtc.cc new file mode 100644 index 0000000000..543dba3026 --- /dev/null +++ b/catch/unit/atomics/atomicSub_negative_kernels_rtc.cc @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicSub_int{ + R"( + __global__ void atomicSub_int_v1(int* address, int* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_int_v2(int* address, int* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_int_v3(int* address, int* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_int_v4(Dummy* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v5(char* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v6(short* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v7(long* address, int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_int_v8(long long* address, int* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_uint{ + R"( + __global__ void atomicSub_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v5(char* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v6(short* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v7(long* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_uint_v8(long long* address, unsigned int* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_ulong{ + R"( + __global__ void atomicSub_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v5(char* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v6(short* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v7(long* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulong_v8(long long* address, unsigned long* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_ulonglong{ + R"( + __global__ void atomicSub_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(&address, 1234); + } + + __global__ void atomicSub_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicSub(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + + __global__ void atomicSub_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_float{ + R"( + __global__ void atomicSub_float_v1(float* address, float* result) { + *result = atomicSub(&address, 1234.f); + } + + __global__ void atomicSub_float_v2(float* address, float* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_float_v3(float* address, float* result) { + *result = atomicSub(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_float_v4(Dummy* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v5(char* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v6(short* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v7(long* address, float* result) { + *result = atomicSub(address, 1234.f); + } + + __global__ void atomicSub_float_v8(long long* address, float* result) { + *result = atomicSub(address, 1234); + } + )"}; + +static constexpr auto kAtomicSub_double{ + R"( + __global__ void atomicSub_double_v1(double* address, double* result) { + *result = atomicSub(&address, 1234.0); + } + + __global__ void atomicSub_double_v2(double* address, double* result) { + *result = atomicSub(address, address); + } + + __global__ void atomicSub_double_v3(double* address, double* result) { + *result = atomicSub(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicSub_double_v4(Dummy* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v5(char* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v6(short* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v7(long* address, double* result) { + *result = atomicSub(address, 1234.0); + } + + __global__ void atomicSub_double_v8(long long* address, double* result) { + *result = atomicSub(address, 1234.0); + } + )"}; diff --git a/catch/unit/atomics/atomicSub_system.cc b/catch/unit/atomics/atomicSub_system.cc new file mode 100644 index 0000000000..0abccf754f --- /dev/null +++ b/catch/unit/atomics/atomicSub_system.cc @@ -0,0 +1,177 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicSub_system atomicSub_system + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Peer_GPUs", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the + * memory location, storing the return value into a separate output array slot corresponding to + * it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same + * memory location(s). Once complete, the output array and target memory is validated to contain + * all the expected values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition on a target memory location. Each thread will add the same value to the + * memory location, storing the return value into a separate output array slot corresponding to + * it. While the kernel is running, the host performs atomic additions, in 4 threads, on the same + * memory location(s). Once complete, the output array and target memory is validated to contain + * all the expected values. Several memory access patterns are tested: + * -# All threads subtract from a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicSub_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicSub_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicSub_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} From 8f244479cfc4e024cb061c75cae630ad4d7c76b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 16:32:42 +0100 Subject: [PATCH 38/71] EXWHTEC-262 - Implement tests for increment/decrement atomic operations #395 Change-Id: I4d3ff4beb9385abdebf59e3eac8783a5c625fdac --- catch/unit/atomics/CMakeLists.txt | 11 ++ catch/unit/atomics/atomicDec.cc | 164 ++++++++++++++++++ .../atomics/atomicDec_negative_kernels.cc | 62 +++++++ .../atomics/atomicDec_negative_kernels_rtc.hh | 68 ++++++++ catch/unit/atomics/atomicInc.cc | 164 ++++++++++++++++++ .../atomics/atomicInc_negative_kernels.cc | 62 +++++++ .../atomics/atomicInc_negative_kernels_rtc.hh | 68 ++++++++ 7 files changed, 599 insertions(+) create mode 100644 catch/unit/atomics/atomicDec.cc create mode 100644 catch/unit/atomics/atomicDec_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicDec_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicInc.cc create mode 100644 catch/unit/atomics/atomicInc_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicInc_negative_kernels_rtc.hh diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index d58bca3bca..fecef54c85 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -44,6 +44,8 @@ set(TEST_SRC safeAtomicAdd.cc atomicSub.cc atomicSub_system.cc + atomicInc.cc + atomicDec.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc @@ -111,6 +113,15 @@ add_test(NAME Unit_atomicSub_Negative_Parameters COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicSub_negative_kernels.cc 48) +add_test(NAME Unit_atomicInc_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicInc_negative_kernels.cc 8) + +add_test(NAME Unit_atomicDec_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicDec_negative_kernels.cc 8) # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters diff --git a/catch/unit/atomics/atomicDec.cc b/catch/unit/atomics/atomicDec.cc new file mode 100644 index 0000000000..e088ebe2b6 --- /dev/null +++ b/catch/unit/atomics/atomicDec.cc @@ -0,0 +1,164 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicDec_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicDec atomicDec + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * decrement on a target memory location. Each thread will decrement the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads decrement a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicDec + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicDec_Positive", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic decrement on a target memory location. Each thread will decrement the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads decrement a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicDec + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicDec_Positive_Multi_Kernel", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicDec. + * Test source + * ------------------------ + * - unit/atomics/atomicDec.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicDec_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicDec_uint); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicDec_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicDec_negative_kernels.cc b/catch/unit/atomics/atomicDec_negative_kernels.cc new file mode 100644 index 0000000000..4177ec0e70 --- /dev/null +++ b/catch/unit/atomics/atomicDec_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* unsigned int atomicDec(unsigned int* address, unsigned int val) */ +__global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicDec(&address, 1234); +} + +__global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicDec(address, address); +} + +__global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicDec(1234, 1234); +} + +__global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v5(char* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v6(short* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v7(long* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} + +__global__ void atomicDec_uint_v8(long long* address, unsigned int* result) { + *result = atomicDec(address, 1234); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh b/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh new file mode 100644 index 0000000000..88ab33d01a --- /dev/null +++ b/catch/unit/atomics/atomicDec_negative_kernels_rtc.hh @@ -0,0 +1,68 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicDec_uint{ + R"( + __global__ void atomicDec_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicDec(&address, 1234); + } + + __global__ void atomicDec_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicDec(address, address); + } + + __global__ void atomicDec_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicDec(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicDec_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v5(char* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v6(short* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v7(long* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + + __global__ void atomicDec_uint_v8(long long* address, unsigned int* result) { + *result = atomicDec(address, 1234); + } + )"}; \ No newline at end of file diff --git a/catch/unit/atomics/atomicInc.cc b/catch/unit/atomics/atomicInc.cc new file mode 100644 index 0000000000..4c7f79a04f --- /dev/null +++ b/catch/unit/atomics/atomicInc.cc @@ -0,0 +1,164 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicInc_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicInc atomicInc + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * increment on a target memory location. Each thread will increment the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads increment a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicInc + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicInc_Positive", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will + * perform an atomic increment on a target memory location. Each thread will increment the memory + * location, storing the return value into a separate output array slot corresponding to it. Once + * complete, the output array and target memory is validated to contain all the expected values. + * Several memory access patterns are tested: + * -# All threads increment a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicInc + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicInc_Positive_Multi_Kernel", "", unsigned int) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicInc. + * Test source + * ------------------------ + * - unit/atomics/atomicInc.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicInc_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicInc_uint); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicInc_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicInc_negative_kernels.cc b/catch/unit/atomics/atomicInc_negative_kernels.cc new file mode 100644 index 0000000000..8c0f9e7fb6 --- /dev/null +++ b/catch/unit/atomics/atomicInc_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* unsigned int atomicInc(unsigned int* address, unsigned int val) */ +__global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicInc(&address, 1234); +} + +__global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicInc(address, address); +} + +__global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicInc(1234, 1234); +} + +__global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v5(char* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v6(short* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v7(long* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} + +__global__ void atomicInc_uint_v8(long long* address, unsigned int* result) { + *result = atomicInc(address, 1234); +} \ No newline at end of file diff --git a/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh b/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh new file mode 100644 index 0000000000..c4ef1e91c7 --- /dev/null +++ b/catch/unit/atomics/atomicInc_negative_kernels_rtc.hh @@ -0,0 +1,68 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicInc_uint{ + R"( + __global__ void atomicInc_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicInc(&address, 1234); + } + + __global__ void atomicInc_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicInc(address, address); + } + + __global__ void atomicInc_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicInc(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicInc_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v5(char* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v6(short* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v7(long* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + + __global__ void atomicInc_uint_v8(long long* address, unsigned int* result) { + *result = atomicInc(address, 1234); + } + )"}; \ No newline at end of file From 7659470dbc88f6b08a029cdcfd304edb868fe718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:55:53 +0100 Subject: [PATCH 39/71] EXSWHTEC-275 - Implement tests for atomic CAS operations #408 Change-Id: I77c6995f1f85dedce3b3afb907abb03a4a1f2b83 --- catch/unit/atomics/CMakeLists.txt | 8 + catch/unit/atomics/arithmetic_common.hh | 39 ++- catch/unit/atomics/atomicCAS.cc | 172 +++++++++++ .../atomics/atomicCAS_negative_kernels.cc | 62 ++++ .../atomics/atomicCAS_negative_kernels_rtc.hh | 273 ++++++++++++++++++ catch/unit/atomics/atomicCAS_system.cc | 185 ++++++++++++ 6 files changed, 736 insertions(+), 3 deletions(-) create mode 100644 catch/unit/atomics/atomicCAS.cc create mode 100644 catch/unit/atomics/atomicCAS_negative_kernels.cc create mode 100644 catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh create mode 100644 catch/unit/atomics/atomicCAS_system.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index fecef54c85..1ec472bffc 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -46,6 +46,8 @@ set(TEST_SRC atomicSub_system.cc atomicInc.cc atomicDec.cc + atomicCAS.cc + atomicCAS_system.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc @@ -57,6 +59,7 @@ set(TEST_SRC if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(atomicAdd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicSub_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicCAS_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") @@ -123,6 +126,11 @@ add_test(NAME Unit_atomicDec_Negative_Parameters ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicDec_negative_kernels.cc 8) +add_test(NAME Unit_atomicCAS_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicCAS_negative_kernels.cc 48) + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh index 0be866390e..cc701a06a0 100644 --- a/catch/unit/atomics/arithmetic_common.hh +++ b/catch/unit/atomics/arithmetic_common.hh @@ -38,7 +38,9 @@ enum class AtomicOperation { kInc, kDec, kUnsafeAdd, - kSafeAdd + kSafeAdd, + kCASAdd, + kCASAddSystem }; // Constants that are passed as operands to the atomic operations @@ -59,6 +61,31 @@ __host__ __device__ TestType GetTestValue() { return std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; } +// Implements an atomic addition via atomicCAS +template __device__ TestType CASAtomicAdd(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + +// Implements an atomic addition via atomicCAS_system +template +__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS_system(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + // Performs an atomic operation on parameter `mem` based on the `operation` enumerator. template __device__ TestType PerformAtomicOperation(TestType* const mem) { @@ -80,6 +107,10 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return unsafeAtomicAdd(mem, val); } else if constexpr (operation == AtomicOperation::kSafeAdd) { return safeAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAdd) { + return CASAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAddSystem) { + return CASAtomicAddSystem(mem, val); } } @@ -202,7 +233,8 @@ std::tuple, std::vector> TestKernelHostRef(const if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || operation == AtomicOperation::kUnsafeAdd || - operation == AtomicOperation::kSafeAdd) { + operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd || + operation == AtomicOperation::kCASAddSystem) { res = res + val; } else if constexpr (operation == AtomicOperation::kSub || operation == AtomicOperation::kSubSystem) { @@ -270,7 +302,8 @@ void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const auto val = GetTestValue(); for (auto i = 0u; i < iterations; ++i) { - if constexpr (operation == AtomicOperation::kAddSystem) { + if constexpr (operation == AtomicOperation::kAddSystem || + operation == AtomicOperation::kCASAddSystem) { old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); } else if constexpr (operation == AtomicOperation::kSubSystem) { old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); diff --git a/catch/unit/atomics/atomicCAS.cc b/catch/unit/atomics/atomicCAS.cc new file mode 100644 index 0000000000..3be684306d --- /dev/null +++ b/catch/unit/atomics/atomicCAS.cc @@ -0,0 +1,172 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicCAS_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicCAS atomicCAS + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition, implemented using an atomic CAS operation, on a target memory location. Each thread + * will add the same value to the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the output array and target memory is validated to + * contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicCAS. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong, + kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/catch/unit/atomics/atomicCAS_negative_kernels.cc b/catch/unit/atomics/atomicCAS_negative_kernels.cc new file mode 100644 index 0000000000..b0390bb3fa --- /dev/null +++ b/catch/unit/atomics/atomicCAS_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \ + __global__ void atomicCAS_v1(type_name* address, type_name* result) { \ + *result = atomicCAS(&address, 12, 13); \ + } \ + __global__ void atomicCAS_v2(type_name* address, type_name* result) { \ + *result = atomicCAS(address, address, 13); \ + } \ + __global__ void atomicCAS_v3(type_name* address, type_name* result) { \ + *result = atomicCAS(address, 12, address); \ + } \ + __global__ void atomicCAS_v4(Dummy* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v5(char* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v6(short* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v7(long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v8(long long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } + +ATOMIC_CAS_NEGATIVE_KERNEL(int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long) +ATOMIC_CAS_NEGATIVE_KERNEL(float) +ATOMIC_CAS_NEGATIVE_KERNEL(double) diff --git a/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh b/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh new file mode 100644 index 0000000000..952c4892fb --- /dev/null +++ b/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicCAS_int{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_int_v1(int* address, int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_int_v2(int* address, int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_int_v3(int* address, int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_int_v4(Dummy* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v5(char* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v6(short* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v7(long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v8(long long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_uint{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v5(char* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v6(short* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v7(long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulonglong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_float{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_float_v1(float* address, float* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_float_v2(float* address, float* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_float_v3(float* address, float* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_float_v4(Dummy* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v5(char* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v6(short* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v7(long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v8(long long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_double{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_double_v1(double* address, double* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_double_v2(double* address, double* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_double_v3(double* address, double* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_double_v4(Dummy* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v5(char* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v6(short* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v7(long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v8(long long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; diff --git a/catch/unit/atomics/atomicCAS_system.cc b/catch/unit/atomics/atomicCAS_system.cc new file mode 100644 index 0000000000..8f2dd8306b --- /dev/null +++ b/catch/unit/atomics/atomicCAS_system.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicCAS_system atomicCAS_system + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +} From 91cff794b812cb5f87bc1a286383ccb57c0ec56c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:09:55 +0100 Subject: [PATCH 40/71] EXSWHTEC-333 - Extend tests for warp shlf and shfl_xor functions to support half-precision types #420 Change-Id: I1da47a0a4b8d15b0d2d569eb4769aa40207aade2 --- catch/unit/warp/warp_common.hh | 10 ++++++++++ catch/unit/warp/warp_shfl.cc | 10 +++------- catch/unit/warp/warp_shfl_common.hh | 10 ++++++++++ catch/unit/warp/warp_shfl_xor.cc | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/catch/unit/warp/warp_common.hh b/catch/unit/warp/warp_common.hh index d09e96837e..875af88e54 100644 --- a/catch/unit/warp/warp_common.hh +++ b/catch/unit/warp/warp_common.hh @@ -21,6 +21,16 @@ THE SOFTWARE. #include #include +#include + +static bool operator==(__half x, __half y) { + // __heq doesn't have a __host__ version + return static_cast<__half_raw>(x).data == static_cast<__half_raw>(y).data; +} +static bool operator!=(__half x, __half y) { return !(x == y); } + +static bool operator==(__half2 x, __half2 y) { return __hbeq2(x, y); } +static bool operator!=(__half2 x, __half2 y) { return !(x == y); } static __device__ bool deactivate_thread(const uint64_t* const active_masks) { const auto warp = diff --git a/catch/unit/warp/warp_shfl.cc b/catch/unit/warp/warp_shfl.cc index babb814fe4..73913ef672 100644 --- a/catch/unit/warp/warp_shfl.cc +++ b/catch/unit/warp/warp_shfl.cc @@ -100,7 +100,7 @@ template class WarpShfl : public WarpShflTest, T> { * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); @@ -111,11 +111,7 @@ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Positive_Basic", "", int, unsigned int, long, return; } - SECTION("Shfl with specified active mask and input values") { - WarpShfl().run(false); - } + SECTION("Shfl with specified active mask and input values") { WarpShfl().run(false); } - SECTION("Shfl with random active mask and input values") { - WarpShfl().run(true); - } + SECTION("Shfl with random active mask and input values") { WarpShfl().run(true); } } diff --git a/catch/unit/warp/warp_shfl_common.hh b/catch/unit/warp/warp_shfl_common.hh index 97b2677f31..44097c8f0a 100644 --- a/catch/unit/warp/warp_shfl_common.hh +++ b/catch/unit/warp/warp_shfl_common.hh @@ -82,6 +82,16 @@ template class WarpShflTest { return static_cast( GenerateRandomReal(std::numeric_limits().min(), std::numeric_limits().max())); }); + } else if constexpr (std::is_same_v<__half, T>) { + std::generate_n(input, grid_.thread_count_, [] { + return __float2half(GenerateRandomReal(std::numeric_limits().min(), + std::numeric_limits().max())); + }); + } else if constexpr (std::is_same_v<__half2, T>) { + std::generate_n(input, grid_.thread_count_, [] { + return __float2half2_rn(GenerateRandomReal(std::numeric_limits().min(), + std::numeric_limits().max())); + }); } else { std::generate_n(input, grid_.thread_count_, [] { return static_cast(GenerateRandomInteger(std::numeric_limits().min(), diff --git a/catch/unit/warp/warp_shfl_xor.cc b/catch/unit/warp/warp_shfl_xor.cc index 3edbca1b3a..267bc91119 100644 --- a/catch/unit/warp/warp_shfl_xor.cc +++ b/catch/unit/warp/warp_shfl_xor.cc @@ -97,7 +97,7 @@ template class WarpShflXOR : public WarpShflTest, T> * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_XOR_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); From 50031b5c444f3728de1b70ecc3556cdaedc22024 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Thu, 1 Feb 2024 17:33:39 +0530 Subject: [PATCH 41/71] EXSWHTEC-338 - Implement tests for half2 type casting intrinsics #422 Change-Id: I5492fa7d54573d45bfdb9320e74ccc6ca7640d2d --- catch/include/cmd_options.hh | 4 - catch/include/hip_test_defgroups.hh | 7 + catch/include/resource_guards.hh | 3 + catch/unit/math/CMakeLists.txt | 6 + catch/unit/math/Float16.hh | 56 +++ catch/unit/math/binary_common.hh | 13 +- catch/unit/math/casting_common.hh | 66 ++- catch/unit/math/casting_half2_common.hh | 97 ++++ catch/unit/math/casting_half2_funcs.cc | 419 ++++++++++++++++++ .../math/casting_half2_negative_kernels.cc | 57 +++ catch/unit/math/half_precision_common.hh | 103 +++++ catch/unit/math/math_common.hh | 23 +- catch/unit/math/ternary_common.hh | 29 +- catch/unit/math/unary_common.hh | 49 ++ catch/unit/math/validators.hh | 16 +- 15 files changed, 909 insertions(+), 39 deletions(-) create mode 100644 catch/unit/math/Float16.hh create mode 100644 catch/unit/math/casting_half2_common.hh create mode 100644 catch/unit/math/casting_half2_funcs.cc create mode 100644 catch/unit/math/casting_half2_negative_kernels.cc create mode 100644 catch/unit/math/half_precision_common.hh diff --git a/catch/include/cmd_options.hh b/catch/include/cmd_options.hh index 71f21006e0..666f34ea82 100644 --- a/catch/include/cmd_options.hh +++ b/catch/include/cmd_options.hh @@ -37,7 +37,3 @@ struct CmdOptions { }; extern CmdOptions cmd_options; -<<<<<<< HEAD -======= - ->>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 58ecdae020..0a56d94239 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -311,6 +311,13 @@ TEST_CASE("Unit_atomicDec_Negative_Parameters") {} * @} */ + /** + * @defgroup MathTest Math Device Functions + * @{ + * This section describes tests for device math functions of HIP runtime API. + * @} + */ + /** * @defgroup PrintfTest Printf API Management * @{ diff --git a/catch/include/resource_guards.hh b/catch/include/resource_guards.hh index 262b7b4437..20c1a20ee5 100644 --- a/catch/include/resource_guards.hh +++ b/catch/include/resource_guards.hh @@ -112,6 +112,9 @@ template class LinearAllocGuard { T* host_ptr_ = nullptr; void dealloc() { + if (ptr_ == nullptr) { + return; + } // No Catch macros, don't want to possibly throw in the destructor if (ptr_ != nullptr) { switch (allocation_type_) { diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 973e875abe..3cb30e1f0c 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -32,6 +32,7 @@ set(TEST_SRC casting_double_funcs.cc casting_float_funcs.cc casting_int_funcs.cc + casting_half2_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -116,3 +117,8 @@ add_test(NAME Unit_Device_casting_int_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} casting_int_negative_kernels.cc 92) + +add_test(NAME Unit_Device_casting_half2_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_half2_negative_kernels.cc 53) diff --git a/catch/unit/math/Float16.hh b/catch/unit/math/Float16.hh new file mode 100644 index 0000000000..79d2064cd2 --- /dev/null +++ b/catch/unit/math/Float16.hh @@ -0,0 +1,56 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#define FLOAT16_MAX 65504.0f + +class Float16 { + public: + __host__ __device__ Float16() = default; + __host__ __device__ Float16(__half x) : x_{x} {} + __host__ __device__ Float16(__half2 x) : x_{__low2half(x)} {} + __host__ __device__ Float16(float x) : x_{__float2half(x)} {} + + __host__ __device__ bool operator==(Float16 other) const { + return static_cast<__half_raw>(x_).data == static_cast<__half_raw>(other.x_).data; + } + __host__ __device__ bool operator!=(Float16 other) const { return !(*this == other); } + + __host__ __device__ operator __half() const { return x_; } + __host__ __device__ operator __half2() const { return __half2half2(x_); } + __host__ __device__ operator float() const { return __half2float(x_); } + + private: + __half x_; +}; + +namespace { + +inline std::ostream& operator<<(std::ostream& o, Float16 x) { + o << static_cast(x); + return o; +} + +} // namespace \ No newline at end of file diff --git a/catch/unit/math/binary_common.hh b/catch/unit/math/binary_common.hh index 2ddaee474f..395fb28c79 100644 --- a/catch/unit/math/binary_common.hh +++ b/catch/unit/math/binary_common.hh @@ -73,8 +73,13 @@ void BinaryFloatingPointBruteForceTest(kernel_sig kernel, thread_pool.Post([=, &x1s, &x2s] { const auto generator = [=] { static thread_local std::mt19937 rng(std::random_device{}()); - std::uniform_real_distribution> unif_dist(a, b); - return static_cast(unif_dist(rng)); + if constexpr (std::is_same_v) { + std::uniform_real_distribution> unif_dist(-FLOAT16_MAX, FLOAT16_MAX); + return static_cast(unif_dist(rng)); + } else { + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + } }; std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); @@ -94,7 +99,8 @@ void BinaryFloatingPointSpecialValuesTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); - const auto values = std::get>(kSpecialValRegistry); + using SpecialValsType = std::conditional_t, float, TArg>; + const auto values = std::get>(kSpecialValRegistry); const auto size = values.size * values.size; LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; @@ -122,7 +128,6 @@ void BinaryFloatingPointTest(kernel_sig kernel, ref_sig= static_cast(std::numeric_limits::max())) \ @@ -71,13 +82,66 @@ namespace cg = cooperative_groups; return result; \ } - template T1 type2_as_type1_ref(T2 arg) { T1 tmp; memcpy(&tmp, &arg, sizeof(tmp)); return tmp; } +template +void CastUnaryHalfPrecisionBruteForceTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint16_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + if (std::isnan(values.ptr()[base_idx + j]) || std::isinf(values.ptr()[base_idx + j])) { + values.ptr()[base_idx + j] = 0; + } + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + +template +void CastUnaryHalfPrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { CastUnaryHalfPrecisionBruteForceTest(kernel, ref, validator_builder); } +} + + template void CastDoublePrecisionSpecialValuesTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { diff --git a/catch/unit/math/casting_half2_common.hh b/catch/unit/math/casting_half2_common.hh new file mode 100644 index 0000000000..085ae46ccc --- /dev/null +++ b/catch/unit/math/casting_half2_common.hh @@ -0,0 +1,97 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "math_common.hh" +#include "validators.hh" + +namespace cg = cooperative_groups; + +#define CAST_HALF2_KERNEL_DEF(func_name, T) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(__half2{xs[i], -xs[i]}); \ + } \ + } + +#define CAST_BINARY_HALF2_KERNEL_DEF(func_name, T) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(__half2{x1s[i], -x1s[i]}, __half2{x2s[i], -x2s[i]}); \ + } \ + } + +template class Float2Validator : public MatcherBase { + public: + Float2Validator(const float2& target, const VB& vb) + : first_matcher_{vb(target.x)}, second_matcher_{vb(target.y)} {} + + bool match(const float2& val) const override { + return first_matcher_->match(val.x) && second_matcher_->match(val.y); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(float())) first_matcher_; + decltype(std::declval()(float())) second_matcher_; +}; + +template +auto Float2ValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const float2& t, auto&&...) { + return std::make_unique>(t, vb); + }; +} + +template class Half2Validator : public MatcherBase<__half2> { + public: + Half2Validator(const __half2& target, const VB& vb) + : first_matcher_{vb(target.data.x)}, second_matcher_{vb(target.data.y)} {} + + bool match(const __half2& val) const override { + return first_matcher_->match(val.data.x) && second_matcher_->match(val.data.y); + } + + std::string describe() const override { + return "<" + first_matcher_->describe() + ", " + second_matcher_->describe() + ">"; + } + + private: + decltype(std::declval()(Float16())) first_matcher_; + decltype(std::declval()(Float16())) second_matcher_; +}; + +template auto Half2ValidatorBuilderFactory(const ValidatorBuilder& vb) { + return [=](const __half2& t, auto&&...) { + return std::make_unique>(t, vb); + }; +} diff --git a/catch/unit/math/casting_half2_funcs.cc b/catch/unit/math/casting_half2_funcs.cc new file mode 100644 index 0000000000..38562f38eb --- /dev/null +++ b/catch/unit/math/casting_half2_funcs.cc @@ -0,0 +1,419 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" +#include "casting_half2_common.hh" + +/** + * @addtogroup HalfPrecisionCastingHalf2 HalfPrecisionCastingHalf2 + * @{ + * @ingroup MathTest + */ + +/********** half -> half2 **********/ + +CAST_KERNEL_DEF(__half2half2, __half2, Float16) + +static __half2 __half2half2_ref(Float16 x) { return __half2{x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from one __half value. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__half2half2_kernel, __half2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(make_half2, __half2, Float16) + +static __half2 make_half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `make_half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from two __half values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device_make_half2_Accuracy_Positive") { + BinaryFloatingPointTest(make_half2_kernel, make_half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(__halves2half2, __half2, Float16) + +static __half2 __halves2half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__halves2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from two __half values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___halves2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__halves2half2_kernel, __halves2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** half2 -> half **********/ + + +CAST_HALF2_KERNEL_DEF(__low2half, Float16) + +static Float16 __low2half_ref(Float16 x) { return x; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2half` for all possible inputs. The results are compared against + * reference function which returns __half value created from lower __half2 element. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2half_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2half_kernel, __low2half_ref, EqValidatorBuilderFactory()); +} + +CAST_HALF2_KERNEL_DEF(__high2half, Float16) + +static Float16 __high2half_ref(Float16 x) { return -x; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2half` for all possible inputs. The results are compared against + * reference function which returns __half value created from higher __half2 element. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2half_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2half_kernel, __high2half_ref, EqValidatorBuilderFactory()); +} + +/********** half2 -> half2 **********/ + +CAST_HALF2_KERNEL_DEF(__low2half2, __half2) + +static __half2 __low2half2_ref(Float16 x) { return __half2{x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from two lower __half2 elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2half2_kernel, __low2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_HALF2_KERNEL_DEF(__high2half2, __half2) + +static __half2 __high2half2_ref(Float16 x) { return __half2{-x, -x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2half2` for all possible inputs. The results are compared against + * reference function which returns __half2 value created from two higher __half2 elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2half2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2half2_kernel, __high2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_HALF2_KERNEL_DEF(__lowhigh2highlow, __half2) + +static __half2 __lowhigh2highlow_ref(Float16 x) { return __half2{-x, x}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__lowhigh2highlow` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from higher and lower __half2 + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___lowhigh2highlow_Accuracy_Positive") { + UnaryHalfPrecisionTest(__lowhigh2highlow_kernel, __lowhigh2highlow_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_HALF2_KERNEL_DEF(__lows2half2, __half2) + +static __half2 __lows2half2_ref(Float16 x, Float16 y) { return __half2{x, y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__lows2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from lower elements of two __half2 values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___lows2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__lows2half2_kernel, __lows2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_HALF2_KERNEL_DEF(__highs2half2, __half2) + +static __half2 __highs2half2_ref(Float16 x, Float16 y) { return __half2{-x, -y}; } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__highs2half2` against a table of difficult values, followed by a large + * number of randomly generated values. The results are compared against reference function which + * returns __half2 value created from higher elements of two __half2 values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___highs2half2_Accuracy_Positive") { + BinaryFloatingPointTest(__highs2half2_kernel, __highs2half2_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** float -> half2 **********/ + +CAST_KERNEL_DEF(__float2half2_rn, __half2, float) + +static __half2 __float2half2_rn_ref(float x) { + return __half2{static_cast(x), static_cast(x)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half2_rn` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from one casted float value. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half2_rn_Accuracy_Positive") { + UnarySinglePrecisionTest(__float2half2_rn_kernel, __float2half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +CAST_BINARY_KERNEL_DEF(__floats2half2_rn, __half2, float) + +static __half2 __floats2half2_rn_ref(float x, float y) { + return __half2{static_cast(x), static_cast(y)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__floats2half2_rn` against a table of difficult values, followed by a + * large number of randomly generated values. The results are compared against reference function + * which returns __half2 value created from two casted float values. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___floats2half2_rn_Accuracy_Positive") { + BinaryFloatingPointTest(__floats2half2_rn_kernel, __floats2half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** float2 -> half2 **********/ + +__global__ void __float22half2_rn_kernel(__half2* const ys, const size_t num_xs, float* const xs) { + const auto tid = cg::this_grid().thread_rank(); + const auto stride = cg::this_grid().size(); + + for (auto i = tid; i < num_xs; i += stride) { + ys[i] = __float22half2_rn(make_float2(xs[i], -xs[i])); + } +} + +static __half2 __float22half2_rn_ref(float x) { + return __half2{static_cast(x), static_cast(-x)}; +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float22half2_rn` for all possible inputs. The results are compared + * against reference function which returns __half2 value created from two casted float values. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float22half2_rn_Accuracy_Positive") { + UnarySinglePrecisionTest(__float22half2_rn_kernel, __float22half2_rn_ref, + Half2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} + +/********** half2 -> float **********/ + +CAST_HALF2_KERNEL_DEF(__low2float, float) + +static float __low2float_ref(Float16 x) { return static_cast(x); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__low2float` for all possible inputs. The results are compared + * against reference function which returns float value created from lower __half2 element. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___low2float_Accuracy_Positive") { + UnaryHalfPrecisionTest(__low2float_kernel, __low2float_ref, EqValidatorBuilderFactory()); +} + +CAST_HALF2_KERNEL_DEF(__high2float, float) + +static float __high2float_ref(Float16 x) { return static_cast(-x); } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__high2float` for all possible inputs. The results are compared + * against reference function which returns float value created from higher __half2 element. + * elements. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___high2float_Accuracy_Positive") { + UnaryHalfPrecisionTest(__high2float_kernel, __high2float_ref, EqValidatorBuilderFactory()); +} + +/********** half2 -> float2 **********/ + +CAST_HALF2_KERNEL_DEF(__half22float2, float2) + +static float2 __half22float2_ref(Float16 x) { + return make_float2(static_cast(x), static_cast(-x)); +} + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half22float2` for all possible inputs. The results are compared against + * reference function which returns float2 value created from casted elements of one __half2 value. + * + * Test source + * ------------------------ + * - unit/math/casting_half2_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half22float2_Accuracy_Positive") { + UnaryHalfPrecisionTest(__half22float2_kernel, __half22float2_ref, + Float2ValidatorBuilderFactory(EqValidatorBuilderFactory())); +} diff --git a/catch/unit/math/casting_half2_negative_kernels.cc b/catch/unit/math/casting_half2_negative_kernels.cc new file mode 100644 index 0000000000..d1552e45ba --- /dev/null +++ b/catch/unit/math/casting_half2_negative_kernels.cc @@ -0,0 +1,57 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_UNARY_KERNELS_SHELL(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T1* result, T2* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T1* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T2 x) { *result = func_name(x); } + + +#define NEGATIVE_BINARY_KERNELS_SHELL(func_name, T1, T2) \ + __global__ void func_name##_kernel_v1(T2* x, T2 y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(T2 x, T2* y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, T2 y) { T1 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(T2 x, Dummy y) { T1 result = func_name(x, y); } + +NEGATIVE_UNARY_KERNELS_SHELL(__half2half2, __half2, __half) +NEGATIVE_UNARY_KERNELS_SHELL(__low2half, __half, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2half, __half, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__low2half2, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2half2, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__lowhigh2highlow, __half2, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__float2half2_rn, __half2, float) +NEGATIVE_UNARY_KERNELS_SHELL(__float22half2_rn, __half2, float2) +NEGATIVE_UNARY_KERNELS_SHELL(__low2float, float, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__high2float, float, __half2) +NEGATIVE_UNARY_KERNELS_SHELL(__half22float2, float2, __half2) + +NEGATIVE_BINARY_KERNELS_SHELL(make_half2, __half2, __half) +NEGATIVE_BINARY_KERNELS_SHELL(__halves2half2, __half2, __half) +NEGATIVE_BINARY_KERNELS_SHELL(__lows2half2, __half2, __half2) +NEGATIVE_BINARY_KERNELS_SHELL(__highs2half2, __half2, __half2) +NEGATIVE_BINARY_KERNELS_SHELL(__floats2half2_rn, __half2, float) \ No newline at end of file diff --git a/catch/unit/math/half_precision_common.hh b/catch/unit/math/half_precision_common.hh new file mode 100644 index 0000000000..1f494058b0 --- /dev/null +++ b/catch/unit/math/half_precision_common.hh @@ -0,0 +1,103 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "unary_common.hh" +#include "binary_common.hh" +#include "ternary_common.hh" + + +/********** Unary **********/ + +#define MATH_UNARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } + +#define MATH_UNARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryHalfPrecisionTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_UNARY_HP_TEST_DEF(func_name, ref_func) \ + MATH_UNARY_HP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_UNARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x) + + +/********** Binary **********/ + +#define MATH_BINARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } + +#define MATH_BINARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_BINARY_HP_TEST_DEF(func_name, ref_func) \ + MATH_BINARY_HP_TEST_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_BINARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2) + + +/********** Ternary **********/ + +#define MATH_TERNARY_HP_KERNEL_DEF(func_name) \ + __global__ void func_name##_kernel(Float16* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s, Float16* const x3s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i], x3s[i]); \ + } \ + } + +#define MATH_TERNARY_HP_TEST_DEF_IMPL(func_name, ref_func, validator_builder) \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + TernaryFloatingPointTest(func_name##_kernel, ref_func, validator_builder); \ + } + +#define MATH_TERNARY_HP_TEST_DEF(func_name, ref_func, validator_builder) \ + MATH_TERNARY_HP_TEST_DEF_IMPL(func_name, ref_func, func_name##_validator_builder) + +#define MATH_TERNARY_HP_VALIDATOR_BUILDER_DEF(func_name) \ + static std::unique_ptr> func_name##_validator_builder(float target, float x1, \ + float x2, float x3) \ No newline at end of file diff --git a/catch/unit/math/math_common.hh b/catch/unit/math/math_common.hh index 0cd30db404..4f7e5ddd29 100644 --- a/catch/unit/math/math_common.hh +++ b/catch/unit/math/math_common.hh @@ -7,15 +7,8 @@ in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -<<<<<<< HEAD The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -======= - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - ->>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -33,6 +26,7 @@ THE SOFTWARE. #include +#include "Float16.hh" #include "thread_pool.hh" #include "validators.hh" @@ -47,6 +41,15 @@ operator<<(std::ostream& os, const std::pair& p) { << std::setprecision(default_prec); } +template +std::enable_if_t, std::ostream&> +operator<<(std::ostream& os, const T& p) { + const auto default_prec = os.precision(); + return os << "<" << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.x << ", " + << std::setprecision(std::numeric_limits::max_digits10 - 1) << p.y << ">" + << std::setprecision(default_prec); +} + // This class represents a generic numerical accuracy math test. Template parameter T is the output // type of the function being tested, and template parameter pack Ts represents the input types. The // constructor takes a kernel with the signature void(T*, const size_t, Ts*...). The first kernel @@ -107,11 +110,7 @@ template class MathTest { template void RunImpl(const ValidatorBuilder& validator_builder, const size_t grid_dim, const size_t block_dim, RT (*const ref_func)(RTs...), const size_t num_args, -<<<<<<< HEAD - std::index_sequence is, const Ts*... xss) { -======= std::index_sequence, const Ts*... xss) { ->>>>>>> c08a2a5d (Merge branch 'develop' into casting_int_tests) const auto xss_tup = std::make_tuple(xss...); constexpr auto f = [](auto dst, auto src, size_t size) { @@ -196,6 +195,8 @@ template class MathTest { template struct RefType {}; +template <> struct RefType { using type = float; }; + template <> struct RefType { using type = double; }; template <> struct RefType { using type = long double; }; diff --git a/catch/unit/math/ternary_common.hh b/catch/unit/math/ternary_common.hh index 4bc7fe26cc..fef750bded 100644 --- a/catch/unit/math/ternary_common.hh +++ b/catch/unit/math/ternary_common.hh @@ -74,8 +74,13 @@ void TernaryFloatingPointBruteForceTest(kernel_sig kernel, thread_pool.Post([=, &x1s, &x2s, &x3s] { const auto generator = [=] { static thread_local std::mt19937 rng(std::random_device{}()); - std::uniform_real_distribution> unif_dist(a, b); - return static_cast(unif_dist(rng)); + if constexpr (std::is_same_v) { + std::uniform_real_distribution> unif_dist(-FLOAT16_MAX, FLOAT16_MAX); + return static_cast(unif_dist(rng)); + } else { + std::uniform_real_distribution> unif_dist(a, b); + return static_cast(unif_dist(rng)); + } }; std::generate(x1s.ptr() + base_idx, x1s.ptr() + base_idx + sub_batch_size, generator); std::generate(x2s.ptr() + base_idx, x2s.ptr() + base_idx + sub_batch_size, generator); @@ -93,10 +98,11 @@ void TernaryFloatingPointBruteForceTest(kernel_sig kernel, template void TernaryFloatingPointSpecialValuesTest(kernel_sig kernel, - ref_sig ref_func, - const ValidatorBuilder& validator_builder) { + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); - const auto values = std::get>(kSpecialValRegistry); + using SpecialValsType = std::conditional_t, float, TArg>; + const auto values = std::get>(kSpecialValRegistry); const auto size = values.size * values.size * values.size; LinearAllocGuard x1s{LinearAllocs::hipHostMalloc, size * sizeof(TArg)}; @@ -119,13 +125,16 @@ void TernaryFloatingPointSpecialValuesTest(kernel_sig kerne } template -void TernaryFloatingPointTest(kernel_sig kernel, ref_sig ref_func, - const ValidatorBuilder& validator_builder) { +void TernaryFloatingPointTest(kernel_sig kernel, + ref_sig ref_func, + const ValidatorBuilder& validator_builder) { SECTION("Special values") { TernaryFloatingPointSpecialValuesTest(kernel, ref_func, validator_builder); } - SECTION("Brute force") { TernaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); } + SECTION("Brute force") { + TernaryFloatingPointBruteForceTest(kernel, ref_func, validator_builder); + } } @@ -138,7 +147,5 @@ void TernaryFloatingPointTest(kernel_sig kernel, ref_sig ? sp_ulp : dp_ulp; \ \ TernaryFloatingPointTest(kern_name##_kernel, ref, \ - ULPValidatorBuilderFactory(ulp)); \ - \ + ULPValidatorBuilderFactory(ulp)); \ } - diff --git a/catch/unit/math/unary_common.hh b/catch/unit/math/unary_common.hh index d80ffd1bbc..bb1c24b198 100644 --- a/catch/unit/math/unary_common.hh +++ b/catch/unit/math/unary_common.hh @@ -43,6 +43,49 @@ namespace cg = cooperative_groups; } \ } +template +void UnaryHalfPrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, + const ValidatorBuilder& validator_builder) { + const auto [grid_size, block_size] = GetOccupancyMaxPotentialBlockSize(kernel); + uint64_t stop = std::numeric_limits::max() + 1ul; + const auto max_batch_size = + std::min(GetMaxAllowedDeviceMemoryUsage() / (sizeof(Float16) + sizeof(T)), stop); + LinearAllocGuard values{LinearAllocs::hipHostMalloc, max_batch_size * sizeof(Float16)}; + + MathTest math_test(kernel, max_batch_size); + + auto batch_size = max_batch_size; + const auto num_threads = thread_pool.thread_count(); + + for (uint64_t v = 0u; v < stop;) { + batch_size = std::min(max_batch_size, stop - v); + + const auto min_sub_batch_size = batch_size / num_threads; + const auto tail = batch_size % num_threads; + + auto base_idx = 0u; + for (auto i = 0u; i < num_threads; ++i) { + const auto sub_batch_size = min_sub_batch_size + (i < tail); + + thread_pool.Post([=, &values] { + auto t = v; + uint16_t val; + for (auto j = 0u; j < sub_batch_size; ++j) { + val = static_cast(t++); + values.ptr()[base_idx + j] = *reinterpret_cast(&val); + } + }); + + v += sub_batch_size; + base_idx += sub_batch_size; + } + + thread_pool.Wait(); + + math_test.Run(validator_builder, grid_size, block_size, ref_func, batch_size, values.ptr()); + } +} + template void UnarySinglePrecisionBruteForceTest(kernel_sig kernel, ref_sig ref_func, const ValidatorBuilder& validator_builder) { @@ -163,6 +206,12 @@ void UnaryDoublePrecisionSpecialValuesTest(kernel_sig kernel, values.data); } +template +void UnaryHalfPrecisionTest(kernel_sig kernel, ref_sig ref, + const ValidatorBuilder& validator_builder) { + SECTION("Brute force") { UnaryHalfPrecisionBruteForceTest(kernel, ref, validator_builder); } +} + template void UnarySinglePrecisionTest(kernel_sig kernel, ref_sig ref, const ValidatorBuilder& validator_builder) { diff --git a/catch/unit/math/validators.hh b/catch/unit/math/validators.hh index b732f79354..e8bb220b3c 100644 --- a/catch/unit/math/validators.hh +++ b/catch/unit/math/validators.hh @@ -61,21 +61,21 @@ template class ValidatorBase : public MatcherBase }; template auto ULPValidatorBuilderFactory(int64_t ulps) { - return [=](T target, auto&&... args) { + return [=](T target, auto&&...) { return std::make_unique>( target, Catch::WithinULP(target, ulps)); }; }; template auto AbsValidatorBuilderFactory(double margin) { - return [=](T target, auto&&... args) { + return [=](T target, auto&&...) { return std::make_unique>( target, Catch::WithinAbs(target, margin)); }; } template auto RelValidatorBuilderFactory(T margin) { - return [=](T target, auto&&... args) { + return [=](T target, auto&&...) { return std::make_unique>( target, Catch::WithinRel(target, margin)); }; @@ -104,7 +104,7 @@ template class EqValidator : public MatcherBase { }; template auto EqValidatorBuilderFactory() { - return [](T val, auto&&... args) { return std::make_unique>(val); }; + return [](T val, auto&&...) { return std::make_unique>(val); }; } template @@ -128,25 +128,25 @@ class PairValidator : public MatcherBase> { template auto PairValidatorBuilderFactory(const ValidatorBuilder& vb) { - return [=](const std::pair& t, auto&&... args) { + return [=](const std::pair& t, auto&&...) { return std::make_unique>(t, vb, vb); }; } template auto PairValidatorBuilderFactory(const VBF& vbf, const VBS& vbs) { - return [=](const std::pair& t, auto&&... args) { + return [=](const std::pair& t, auto&&...) { return std::make_unique>(t, vbf, vbs); }; } template class NopValidator : public MatcherBase { public: - bool match(const T& val) const override { return true; } + bool match(const T&) const override { return true; } std::string describe() const override { return ""; } }; template auto NopValidatorBuilderFactory() { - return [](auto&&... args) { return std::make_unique>(); }; + return [](auto&&...) { return std::make_unique>(); }; } From fb9a9a9a03b172377c051b881bba5c0037490f73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:05:35 +0100 Subject: [PATCH 42/71] EXSWHTEC-344 - Refactor and extend tests for hipDrvGetErrorName and hipDrvGetErrorString #426 Change-Id: I4eb57eeaff6e5d723e3e236e07d4320430a4582a --- catch/unit/errorHandling/CMakeLists.txt | 7 +- .../errorHandling/error_handling_common.cc | 534 ++++++++++++++++++ ...Enumerators.h => error_handling_common.hh} | 16 +- .../unit/errorHandling/hipDrvGetErrorName.cc | 378 ++----------- .../errorHandling/hipDrvGetErrorString.cc | 278 ++------- catch/unit/errorHandling/hipGetErrorName.cc | 6 +- catch/unit/errorHandling/hipGetErrorString.cc | 5 +- .../unit/errorHandling/hipPeekAtLastError.cc | 4 +- 8 files changed, 654 insertions(+), 574 deletions(-) create mode 100644 catch/unit/errorHandling/error_handling_common.cc rename catch/unit/errorHandling/{errorEnumerators.h => error_handling_common.hh} (95%) diff --git a/catch/unit/errorHandling/CMakeLists.txt b/catch/unit/errorHandling/CMakeLists.txt index 7dcdb52f4c..b9a6de0afa 100644 --- a/catch/unit/errorHandling/CMakeLists.txt +++ b/catch/unit/errorHandling/CMakeLists.txt @@ -1,14 +1,15 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + error_handling_common.cc hipGetErrorName.cc hipGetErrorString.cc - hipGetLastError.cc - hipPeekAtLastError.cc hipDrvGetErrorName.cc hipDrvGetErrorString.cc + hipGetLastError.cc + hipPeekAtLastError.cc ) hip_add_exe_to_target(NAME ErrorHandlingTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests - COMPILE_OPTIONS -std=c++17) \ No newline at end of file + COMPILE_OPTIONS -std=c++17) diff --git a/catch/unit/errorHandling/error_handling_common.cc b/catch/unit/errorHandling/error_handling_common.cc new file mode 100644 index 0000000000..20267e793d --- /dev/null +++ b/catch/unit/errorHandling/error_handling_common.cc @@ -0,0 +1,534 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "error_handling_common.hh" + +const char* ErrorName(hipError_t enumerator) { + switch (enumerator) { +#if HT_AMD + case hipSuccess: + return "hipSuccess"; + case hipErrorInvalidValue: + return "hipErrorInvalidValue"; + case hipErrorOutOfMemory: + return "hipErrorOutOfMemory"; + case hipErrorNotInitialized: + return "hipErrorNotInitialized"; + case hipErrorDeinitialized: + return "hipErrorDeinitialized"; + case hipErrorProfilerDisabled: + return "hipErrorProfilerDisabled"; + case hipErrorProfilerNotInitialized: + return "hipErrorProfilerNotInitialized"; + case hipErrorProfilerAlreadyStarted: + return "hipErrorProfilerAlreadyStarted"; + case hipErrorProfilerAlreadyStopped: + return "hipErrorProfilerAlreadyStopped"; + case hipErrorInvalidConfiguration: + return "hipErrorInvalidConfiguration"; + case hipErrorInvalidSymbol: + return "hipErrorInvalidSymbol"; + case hipErrorInvalidDevicePointer: + return "hipErrorInvalidDevicePointer"; + case hipErrorInvalidMemcpyDirection: + return "hipErrorInvalidMemcpyDirection"; + case hipErrorInsufficientDriver: + return "hipErrorInsufficientDriver"; + case hipErrorMissingConfiguration: + return "hipErrorMissingConfiguration"; + case hipErrorPriorLaunchFailure: + return "hipErrorPriorLaunchFailure"; + case hipErrorInvalidDeviceFunction: + return "hipErrorInvalidDeviceFunction"; + case hipErrorNoDevice: + return "hipErrorNoDevice"; + case hipErrorInvalidDevice: + return "hipErrorInvalidDevice"; + case hipErrorInvalidPitchValue: + return "hipErrorInvalidPitchValue"; + case hipErrorInvalidImage: + return "hipErrorInvalidImage"; + case hipErrorInvalidContext: + return "hipErrorInvalidContext"; + case hipErrorContextAlreadyCurrent: + return "hipErrorContextAlreadyCurrent"; + case hipErrorMapFailed: + return "hipErrorMapFailed"; + case hipErrorUnmapFailed: + return "hipErrorUnmapFailed"; + case hipErrorArrayIsMapped: + return "hipErrorArrayIsMapped"; + case hipErrorAlreadyMapped: + return "hipErrorAlreadyMapped"; + case hipErrorNoBinaryForGpu: + return "hipErrorNoBinaryForGpu"; + case hipErrorAlreadyAcquired: + return "hipErrorAlreadyAcquired"; + case hipErrorNotMapped: + return "hipErrorNotMapped"; + case hipErrorNotMappedAsArray: + return "hipErrorNotMappedAsArray"; + case hipErrorNotMappedAsPointer: + return "hipErrorNotMappedAsPointer"; + case hipErrorECCNotCorrectable: + return "hipErrorECCNotCorrectable"; + case hipErrorUnsupportedLimit: + return "hipErrorUnsupportedLimit"; + case hipErrorContextAlreadyInUse: + return "hipErrorContextAlreadyInUse"; + case hipErrorPeerAccessUnsupported: + return "hipErrorPeerAccessUnsupported"; + case hipErrorInvalidKernelFile: + return "hipErrorInvalidKernelFile"; + case hipErrorInvalidGraphicsContext: + return "hipErrorInvalidGraphicsContext"; + case hipErrorInvalidSource: + return "hipErrorInvalidSource"; + case hipErrorFileNotFound: + return "hipErrorFileNotFound"; + case hipErrorSharedObjectSymbolNotFound: + return "hipErrorSharedObjectSymbolNotFound"; + case hipErrorSharedObjectInitFailed: + return "hipErrorSharedObjectInitFailed"; + case hipErrorOperatingSystem: + return "hipErrorOperatingSystem"; + case hipErrorInvalidHandle: + return "hipErrorInvalidHandle"; + case hipErrorIllegalState: + return "hipErrorIllegalState"; + case hipErrorNotFound: + return "hipErrorNotFound"; + case hipErrorNotReady: + return "hipErrorNotReady"; + case hipErrorIllegalAddress: + return "hipErrorIllegalAddress"; + case hipErrorLaunchOutOfResources: + return "hipErrorLaunchOutOfResources"; + case hipErrorLaunchTimeOut: + return "hipErrorLaunchTimeOut"; + case hipErrorPeerAccessAlreadyEnabled: + return "hipErrorPeerAccessAlreadyEnabled"; + case hipErrorPeerAccessNotEnabled: + return "hipErrorPeerAccessNotEnabled"; + case hipErrorSetOnActiveProcess: + return "hipErrorSetOnActiveProcess"; + case hipErrorContextIsDestroyed: + return "hipErrorContextIsDestroyed"; + case hipErrorAssert: + return "hipErrorAssert"; + case hipErrorHostMemoryAlreadyRegistered: + return "hipErrorHostMemoryAlreadyRegistered"; + case hipErrorHostMemoryNotRegistered: + return "hipErrorHostMemoryNotRegistered"; + case hipErrorLaunchFailure: + return "hipErrorLaunchFailure"; + case hipErrorNotSupported: + return "hipErrorNotSupported"; + case hipErrorUnknown: + return "hipErrorUnknown"; + case hipErrorRuntimeMemory: + return "hipErrorRuntimeMemory"; + case hipErrorRuntimeOther: + return "hipErrorRuntimeOther"; + case hipErrorCooperativeLaunchTooLarge: + return "hipErrorCooperativeLaunchTooLarge"; + case hipErrorStreamCaptureUnsupported: + return "hipErrorStreamCaptureUnsupported"; + case hipErrorStreamCaptureInvalidated: + return "hipErrorStreamCaptureInvalidated"; + case hipErrorStreamCaptureMerge: + return "hipErrorStreamCaptureMerge"; + case hipErrorStreamCaptureUnmatched: + return "hipErrorStreamCaptureUnmatched"; + case hipErrorStreamCaptureUnjoined: + return "hipErrorStreamCaptureUnjoined"; + case hipErrorStreamCaptureIsolation: + return "hipErrorStreamCaptureIsolation"; + case hipErrorStreamCaptureImplicit: + return "hipErrorStreamCaptureImplicit"; + case hipErrorCapturedEvent: + return "hipErrorCapturedEvent"; + case hipErrorStreamCaptureWrongThread: + return "hipErrorStreamCaptureWrongThread"; + case hipErrorGraphExecUpdateFailure: + return "hipErrorGraphExecUpdateFailure"; + case hipErrorTbd: + return "hipErrorTbd"; + default: + return "hipErrorUnknown"; +#else + case hipSuccess: + return "CUDA_SUCCESS"; + case hipErrorInvalidValue: + return "CUDA_ERROR_INVALID_VALUE"; + case hipErrorOutOfMemory: + return "CUDA_ERROR_OUT_OF_MEMORY"; + case hipErrorNotInitialized: + return "CUDA_ERROR_NOT_INITIALIZED"; + case hipErrorDeinitialized: + return "CUDA_ERROR_DEINITIALIZED"; + case hipErrorProfilerDisabled: + return "CUDA_ERROR_PROFILER_DISABLED"; + case hipErrorProfilerNotInitialized: + return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; + case hipErrorProfilerAlreadyStarted: + return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; + case hipErrorProfilerAlreadyStopped: + return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; + case hipErrorInvalidConfiguration: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidSymbol: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidDevicePointer: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidMemcpyDirection: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInsufficientDriver: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorMissingConfiguration: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorPriorLaunchFailure: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidDeviceFunction: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorNoDevice: + return "CUDA_ERROR_NO_DEVICE"; + case hipErrorInvalidDevice: + return "CUDA_ERROR_INVALID_DEVICE"; + case hipErrorInvalidPitchValue: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorInvalidImage: + return "CUDA_ERROR_INVALID_IMAGE"; + case hipErrorInvalidContext: + return "CUDA_ERROR_INVALID_CONTEXT"; + case hipErrorContextAlreadyCurrent: + return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; + case hipErrorMapFailed: + return "CUDA_ERROR_MAP_FAILED"; + case hipErrorUnmapFailed: + return "CUDA_ERROR_UNMAP_FAILED"; + case hipErrorArrayIsMapped: + return "CUDA_ERROR_ARRAY_IS_MAPPED"; + case hipErrorAlreadyMapped: + return "CUDA_ERROR_ALREADY_MAPPED"; + case hipErrorNoBinaryForGpu: + return "CUDA_ERROR_NO_BINARY_FOR_GPU"; + case hipErrorAlreadyAcquired: + return "CUDA_ERROR_ALREADY_ACQUIRED"; + case hipErrorNotMapped: + return "CUDA_ERROR_NOT_MAPPED"; + case hipErrorNotMappedAsArray: + return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; + case hipErrorNotMappedAsPointer: + return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; + case hipErrorECCNotCorrectable: + return "CUDA_ERROR_ECC_UNCORRECTABLE"; + case hipErrorUnsupportedLimit: + return "CUDA_ERROR_UNSUPPORTED_LIMIT"; + case hipErrorContextAlreadyInUse: + return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; + case hipErrorPeerAccessUnsupported: + return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; + case hipErrorInvalidKernelFile: + return "CUDA_ERROR_INVALID_PTX"; + case hipErrorInvalidGraphicsContext: + return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; + case hipErrorInvalidSource: + return "CUDA_ERROR_INVALID_SOURCE"; + case hipErrorFileNotFound: + return "CUDA_ERROR_FILE_NOT_FOUND"; + case hipErrorSharedObjectSymbolNotFound: + return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; + case hipErrorSharedObjectInitFailed: + return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; + case hipErrorOperatingSystem: + return "CUDA_ERROR_OPERATING_SYSTEM"; + case hipErrorInvalidHandle: + return "CUDA_ERROR_INVALID_HANDLE"; + case hipErrorIllegalState: + return "CUDA_ERROR_ILLEGAL_STATE"; + case hipErrorNotFound: + return "CUDA_ERROR_NOT_FOUND"; + case hipErrorNotReady: + return "CUDA_ERROR_NOT_READY"; + case hipErrorIllegalAddress: + return "CUDA_ERROR_ILLEGAL_ADDRESS"; + case hipErrorLaunchOutOfResources: + return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; + case hipErrorLaunchTimeOut: + return "CUDA_ERROR_LAUNCH_TIMEOUT"; + case hipErrorPeerAccessAlreadyEnabled: + return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; + case hipErrorPeerAccessNotEnabled: + return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; + case hipErrorSetOnActiveProcess: + return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; + case hipErrorContextIsDestroyed: + return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; + case hipErrorAssert: + return "CUDA_ERROR_ASSERT"; + case hipErrorHostMemoryAlreadyRegistered: + return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; + case hipErrorHostMemoryNotRegistered: + return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; + case hipErrorLaunchFailure: + return "CUDA_ERROR_LAUNCH_FAILED"; + case hipErrorNotSupported: + return "CUDA_ERROR_NOT_SUPPORTED"; + case hipErrorUnknown: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorRuntimeMemory: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorRuntimeOther: + return "CUDA_ERROR_UNKNOWN"; + case hipErrorCooperativeLaunchTooLarge: + return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"; + case hipErrorStreamCaptureUnsupported: + return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED"; + case hipErrorStreamCaptureInvalidated: + return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED"; + case hipErrorStreamCaptureMerge: + return "CUDA_ERROR_STREAM_CAPTURE_MERGE"; + case hipErrorStreamCaptureUnmatched: + return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED"; + case hipErrorStreamCaptureUnjoined: + return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED"; + case hipErrorStreamCaptureIsolation: + return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION"; + case hipErrorStreamCaptureImplicit: + return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT"; + case hipErrorCapturedEvent: + return "CUDA_ERROR_CAPTURED_EVENT"; + case hipErrorStreamCaptureWrongThread: + return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD"; + case hipErrorGraphExecUpdateFailure: + return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE"; + default: + return "CUDA_ERROR_UNKNOWN"; +#endif + } +} + +const char* ErrorString(hipError_t enumerator) { + switch (enumerator) { + case hipSuccess: + return "no error"; + case hipErrorInvalidValue: + return "invalid argument"; + case hipErrorOutOfMemory: + return "out of memory"; + case hipErrorNotInitialized: + return "initialization error"; + case hipErrorDeinitialized: + return "driver shutting down"; + case hipErrorProfilerDisabled: + return "profiler disabled while using external profiling tool"; + case hipErrorProfilerNotInitialized: +#if HT_AMD + return "profiler is not initialized"; +#elif HT_NVIDIA + return "profiler not initialized: call cudaProfilerInitialize()"; +#endif + case hipErrorProfilerAlreadyStarted: + return "profiler already started"; + case hipErrorProfilerAlreadyStopped: + return "profiler already stopped"; +#if HT_AMD + case hipErrorInvalidConfiguration: + return "invalid configuration argument"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidPitchValue: + return "invalid pitch argument"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidSymbol: + return "invalid device symbol"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidDevicePointer: + return "invalid device pointer"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidMemcpyDirection: + return "invalid copy direction for memcpy"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInsufficientDriver: + return "driver version is insufficient for runtime version"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorMissingConfiguration: + return "__global__ function call is not configured"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorPriorLaunchFailure: + return "unspecified launch failure in prior launch"; +#elif HT_NVIDIA + return "unknown error"; +#endif +#if HT_AMD + case hipErrorInvalidDeviceFunction: + return "invalid device function"; +#elif HT_NVIDIA + return "unknown error"; +#endif + case hipErrorNoDevice: +#if HT_AMD + return "no ROCm-capable device is detected"; +#elif HT_NVIDIA + return "no CUDA-capable device is detected"; +#endif + case hipErrorInvalidDevice: + return "invalid device ordinal"; + case hipErrorInvalidImage: + return "device kernel image is invalid"; + case hipErrorInvalidContext: + return "invalid device context"; + case hipErrorContextAlreadyCurrent: +#if HT_AMD + return "context is already current context"; +#elif HT_NVIDIA + return "context already current"; +#endif + case hipErrorMapFailed: + return "mapping of buffer object failed"; + case hipErrorUnmapFailed: + return "unmapping of buffer object failed"; + case hipErrorArrayIsMapped: + return "array is mapped"; + case hipErrorAlreadyMapped: + return "resource already mapped"; + case hipErrorNoBinaryForGpu: + return "no kernel image is available for execution on the device"; + case hipErrorAlreadyAcquired: + return "resource already acquired"; + case hipErrorNotMapped: + return "resource not mapped"; + case hipErrorNotMappedAsArray: + return "resource not mapped as array"; + case hipErrorNotMappedAsPointer: + return "resource not mapped as pointer"; + case hipErrorECCNotCorrectable: + return "uncorrectable ECC error encountered"; + case hipErrorUnsupportedLimit: + return "limit is not supported on this architecture"; + case hipErrorContextAlreadyInUse: + return "exclusive-thread device already in use by a different thread"; + case hipErrorPeerAccessUnsupported: + return "peer access is not supported between these two devices"; + case hipErrorInvalidKernelFile: +#if HT_AMD + return "invalid kernel file"; +#elif HT_NVIDIA + return "a PTX JIT compilation failed"; +#endif + case hipErrorInvalidGraphicsContext: + return "invalid OpenGL or DirectX context"; + case hipErrorInvalidSource: + return "device kernel image is invalid"; + case hipErrorFileNotFound: + return "file not found"; + case hipErrorSharedObjectSymbolNotFound: + return "shared object symbol not found"; + case hipErrorSharedObjectInitFailed: + return "shared object initialization failed"; + case hipErrorOperatingSystem: + return "OS call failed or operation not supported on this OS"; + case hipErrorInvalidHandle: + return "invalid resource handle"; + case hipErrorIllegalState: + return "the operation cannot be performed in the present state"; + case hipErrorNotFound: + return "named symbol not found"; + case hipErrorNotReady: + return "device not ready"; + case hipErrorIllegalAddress: + return "an illegal memory access was encountered"; + case hipErrorLaunchOutOfResources: + return "too many resources requested for launch"; + case hipErrorLaunchTimeOut: + return "the launch timed out and was terminated"; + case hipErrorPeerAccessAlreadyEnabled: + return "peer access is already enabled"; + case hipErrorPeerAccessNotEnabled: + return "peer access has not been enabled"; + case hipErrorSetOnActiveProcess: + return "cannot set while device is active in this process"; + case hipErrorContextIsDestroyed: + return "context is destroyed"; + case hipErrorAssert: + return "device-side assert triggered"; + case hipErrorHostMemoryAlreadyRegistered: + return "part or all of the requested memory range is already mapped"; + case hipErrorHostMemoryNotRegistered: + return "pointer does not correspond to a registered memory region"; + case hipErrorLaunchFailure: + return "unspecified launch failure"; + case hipErrorCooperativeLaunchTooLarge: + return "too many blocks in cooperative launch"; + case hipErrorNotSupported: + return "operation not supported"; + case hipErrorStreamCaptureUnsupported: + return "operation not permitted when stream is capturing"; + case hipErrorStreamCaptureInvalidated: + return "operation failed due to a previous error during capture"; + case hipErrorStreamCaptureMerge: + return "operation would result in a merge of separate capture sequences"; + case hipErrorStreamCaptureUnmatched: + return "capture was not ended in the same stream as it began"; + case hipErrorStreamCaptureUnjoined: + return "capturing stream has unjoined work"; + case hipErrorStreamCaptureIsolation: + return "dependency created on uncaptured work in another stream"; + case hipErrorStreamCaptureImplicit: + return "operation would make the legacy stream depend on a capturing blocking stream"; // NOLINT + case hipErrorCapturedEvent: + return "operation not permitted on an event last recorded in a capturing stream"; // NOLINT + case hipErrorStreamCaptureWrongThread: + return "attempt to terminate a thread-local capture sequence from another thread"; // NOLINT + case hipErrorGraphExecUpdateFailure: + return "the graph update was not performed because it included changes which violated " + "constraints specific to instantiated graph update"; // NOLINT + case hipErrorRuntimeMemory: + return "runtime memory call returned error"; + case hipErrorRuntimeOther: + return "runtime call other than memory returned error"; + case hipErrorUnknown: + default: + return "unknown error"; + } +} \ No newline at end of file diff --git a/catch/unit/errorHandling/errorEnumerators.h b/catch/unit/errorHandling/error_handling_common.hh similarity index 95% rename from catch/unit/errorHandling/errorEnumerators.h rename to catch/unit/errorHandling/error_handling_common.hh index e671938c41..902735a1ed 100644 --- a/catch/unit/errorHandling/errorEnumerators.h +++ b/catch/unit/errorHandling/error_handling_common.hh @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -33,7 +33,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorProfilerNotInitialized, hipErrorProfilerAlreadyStarted, hipErrorProfilerAlreadyStopped, - #if HT_AMD +#if HT_AMD hipErrorInvalidConfiguration, hipErrorInvalidPitchValue, hipErrorInvalidSymbol, @@ -43,7 +43,7 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorMissingConfiguration, hipErrorPriorLaunchFailure, hipErrorInvalidDeviceFunction, - #endif +#endif hipErrorNoDevice, hipErrorInvalidDevice, hipErrorInvalidImage, @@ -97,8 +97,12 @@ constexpr hipError_t kErrorEnumerators[] = {hipSuccess, hipErrorStreamCaptureWrongThread, hipErrorGraphExecUpdateFailure, hipErrorUnknown, - #if HT_AMD +#if HT_AMD hipErrorRuntimeMemory, hipErrorRuntimeOther - #endif - }; +#endif +}; + +const char* ErrorName(hipError_t enumerator); + +const char* ErrorString(hipError_t enumerator); \ No newline at end of file diff --git a/catch/unit/errorHandling/hipDrvGetErrorName.cc b/catch/unit/errorHandling/hipDrvGetErrorName.cc index 367d890be0..b3401cfc5d 100644 --- a/catch/unit/errorHandling/hipDrvGetErrorName.cc +++ b/catch/unit/errorHandling/hipDrvGetErrorName.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,347 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include -#include "errorEnumerators.h" -// Local Function to return the error code in string +#include "error_handling_common.hh" -static const char *ErrorName(hipError_t enumerator) { - switch (enumerator) { - #if HT_AMD - case hipSuccess: - return "hipSuccess"; - case hipErrorInvalidValue: - return "hipErrorInvalidValue"; - case hipErrorOutOfMemory: - return "hipErrorOutOfMemory"; - case hipErrorNotInitialized: - return "hipErrorNotInitialized"; - case hipErrorDeinitialized: - return "hipErrorDeinitialized"; - case hipErrorProfilerDisabled: - return "hipErrorProfilerDisabled"; - case hipErrorProfilerNotInitialized: - return "hipErrorProfilerNotInitialized"; - case hipErrorProfilerAlreadyStarted: - return "hipErrorProfilerAlreadyStarted"; - case hipErrorProfilerAlreadyStopped: - return "hipErrorProfilerAlreadyStopped"; - case hipErrorInvalidConfiguration: - return "hipErrorInvalidConfiguration"; - case hipErrorInvalidSymbol: - return "hipErrorInvalidSymbol"; - case hipErrorInvalidDevicePointer: - return "hipErrorInvalidDevicePointer"; - case hipErrorInvalidMemcpyDirection: - return "hipErrorInvalidMemcpyDirection"; - case hipErrorInsufficientDriver: - return "hipErrorInsufficientDriver"; - case hipErrorMissingConfiguration: - return "hipErrorMissingConfiguration"; - case hipErrorPriorLaunchFailure: - return "hipErrorPriorLaunchFailure"; - case hipErrorInvalidDeviceFunction: - return "hipErrorInvalidDeviceFunction"; - case hipErrorNoDevice: - return "hipErrorNoDevice"; - case hipErrorInvalidDevice: - return "hipErrorInvalidDevice"; - case hipErrorInvalidPitchValue: - return "hipErrorInvalidPitchValue"; - case hipErrorInvalidImage: - return "hipErrorInvalidImage"; - case hipErrorInvalidContext: - return "hipErrorInvalidContext"; - case hipErrorContextAlreadyCurrent: - return "hipErrorContextAlreadyCurrent"; - case hipErrorMapFailed: - return "hipErrorMapFailed"; - case hipErrorUnmapFailed: - return "hipErrorUnmapFailed"; - case hipErrorArrayIsMapped: - return "hipErrorArrayIsMapped"; - case hipErrorAlreadyMapped: - return "hipErrorAlreadyMapped"; - case hipErrorNoBinaryForGpu: - return "hipErrorNoBinaryForGpu"; - case hipErrorAlreadyAcquired: - return "hipErrorAlreadyAcquired"; - case hipErrorNotMapped: - return "hipErrorNotMapped"; - case hipErrorNotMappedAsArray: - return "hipErrorNotMappedAsArray"; - case hipErrorNotMappedAsPointer: - return "hipErrorNotMappedAsPointer"; - case hipErrorECCNotCorrectable: - return "hipErrorECCNotCorrectable"; - case hipErrorUnsupportedLimit: - return "hipErrorUnsupportedLimit"; - case hipErrorContextAlreadyInUse: - return "hipErrorContextAlreadyInUse"; - case hipErrorPeerAccessUnsupported: - return "hipErrorPeerAccessUnsupported"; - case hipErrorInvalidKernelFile: - return "hipErrorInvalidKernelFile"; - case hipErrorInvalidGraphicsContext: - return "hipErrorInvalidGraphicsContext"; - case hipErrorInvalidSource: - return "hipErrorInvalidSource"; - case hipErrorFileNotFound: - return "hipErrorFileNotFound"; - case hipErrorSharedObjectSymbolNotFound: - return "hipErrorSharedObjectSymbolNotFound"; - case hipErrorSharedObjectInitFailed: - return "hipErrorSharedObjectInitFailed"; - case hipErrorOperatingSystem: - return "hipErrorOperatingSystem"; - case hipErrorInvalidHandle: - return "hipErrorInvalidHandle"; - case hipErrorIllegalState: - return "hipErrorIllegalState"; - case hipErrorNotFound: - return "hipErrorNotFound"; - case hipErrorNotReady: - return "hipErrorNotReady"; - case hipErrorIllegalAddress: - return "hipErrorIllegalAddress"; - case hipErrorLaunchOutOfResources: - return "hipErrorLaunchOutOfResources"; - case hipErrorLaunchTimeOut: - return "hipErrorLaunchTimeOut"; - case hipErrorPeerAccessAlreadyEnabled: - return "hipErrorPeerAccessAlreadyEnabled"; - case hipErrorPeerAccessNotEnabled: - return "hipErrorPeerAccessNotEnabled"; - case hipErrorSetOnActiveProcess: - return "hipErrorSetOnActiveProcess"; - case hipErrorContextIsDestroyed: - return "hipErrorContextIsDestroyed"; - case hipErrorAssert: - return "hipErrorAssert"; - case hipErrorHostMemoryAlreadyRegistered: - return "hipErrorHostMemoryAlreadyRegistered"; - case hipErrorHostMemoryNotRegistered: - return "hipErrorHostMemoryNotRegistered"; - case hipErrorLaunchFailure: - return "hipErrorLaunchFailure"; - case hipErrorNotSupported: - return "hipErrorNotSupported"; - case hipErrorUnknown: - return "hipErrorUnknown"; - case hipErrorRuntimeMemory: - return "hipErrorRuntimeMemory"; - case hipErrorRuntimeOther: - return "hipErrorRuntimeOther"; - case hipErrorCooperativeLaunchTooLarge: - return "hipErrorCooperativeLaunchTooLarge"; - case hipErrorStreamCaptureUnsupported: - return "hipErrorStreamCaptureUnsupported"; - case hipErrorStreamCaptureInvalidated: - return "hipErrorStreamCaptureInvalidated"; - case hipErrorStreamCaptureMerge: - return "hipErrorStreamCaptureMerge"; - case hipErrorStreamCaptureUnmatched: - return "hipErrorStreamCaptureUnmatched"; - case hipErrorStreamCaptureUnjoined: - return "hipErrorStreamCaptureUnjoined"; - case hipErrorStreamCaptureIsolation: - return "hipErrorStreamCaptureIsolation"; - case hipErrorStreamCaptureImplicit: - return "hipErrorStreamCaptureImplicit"; - case hipErrorCapturedEvent: - return "hipErrorCapturedEvent"; - case hipErrorStreamCaptureWrongThread: - return "hipErrorStreamCaptureWrongThread"; - case hipErrorGraphExecUpdateFailure: - return "hipErrorGraphExecUpdateFailure"; - case hipErrorTbd: - return "hipErrorTbd"; - default: - return "hipErrorUnknown"; - #endif - #if HT_NVIDIA - case hipSuccess: - return "CUDA_SUCCESS"; - case hipErrorInvalidValue: - return "CUDA_ERROR_INVALID_VALUE"; - case hipErrorOutOfMemory: - return "CUDA_ERROR_OUT_OF_MEMORY"; - case hipErrorNotInitialized: - return "CUDA_ERROR_NOT_INITIALIZED"; - case hipErrorDeinitialized: - return "CUDA_ERROR_DEINITIALIZED"; - case hipErrorProfilerDisabled: - return "CUDA_ERROR_PROFILER_DISABLED"; - case hipErrorProfilerNotInitialized: - return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; - case hipErrorProfilerAlreadyStarted: - return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; - case hipErrorProfilerAlreadyStopped: - return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; - case hipErrorInvalidConfiguration: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidSymbol: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidDevicePointer: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidMemcpyDirection: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInsufficientDriver: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorMissingConfiguration: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorPriorLaunchFailure: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidDeviceFunction: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorNoDevice: - return "CUDA_ERROR_NO_DEVICE"; - case hipErrorInvalidDevice: - return "CUDA_ERROR_INVALID_DEVICE"; - case hipErrorInvalidPitchValue: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorInvalidImage: - return "CUDA_ERROR_INVALID_IMAGE"; - case hipErrorInvalidContext: - return "CUDA_ERROR_INVALID_CONTEXT"; - case hipErrorContextAlreadyCurrent: - return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; - case hipErrorMapFailed: - return "CUDA_ERROR_MAP_FAILED"; - case hipErrorUnmapFailed: - return "CUDA_ERROR_UNMAP_FAILED"; - case hipErrorArrayIsMapped: - return "CUDA_ERROR_ARRAY_IS_MAPPED"; - case hipErrorAlreadyMapped: - return "CUDA_ERROR_ALREADY_MAPPED"; - case hipErrorNoBinaryForGpu: - return "CUDA_ERROR_NO_BINARY_FOR_GPU"; - case hipErrorAlreadyAcquired: - return "CUDA_ERROR_ALREADY_ACQUIRED"; - case hipErrorNotMapped: - return "CUDA_ERROR_NOT_MAPPED"; - case hipErrorNotMappedAsArray: - return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; - case hipErrorNotMappedAsPointer: - return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; - case hipErrorECCNotCorrectable: - return "CUDA_ERROR_ECC_UNCORRECTABLE"; - case hipErrorUnsupportedLimit: - return "CUDA_ERROR_UNSUPPORTED_LIMIT"; - case hipErrorContextAlreadyInUse: - return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; - case hipErrorPeerAccessUnsupported: - return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; - case hipErrorInvalidKernelFile: - return "CUDA_ERROR_INVALID_PTX"; - case hipErrorInvalidGraphicsContext: - return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; - case hipErrorInvalidSource: - return "CUDA_ERROR_INVALID_SOURCE"; - case hipErrorFileNotFound: - return "CUDA_ERROR_FILE_NOT_FOUND"; - case hipErrorSharedObjectSymbolNotFound: - return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; - case hipErrorSharedObjectInitFailed: - return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; - case hipErrorOperatingSystem: - return "CUDA_ERROR_OPERATING_SYSTEM"; - case hipErrorInvalidHandle: - return "CUDA_ERROR_INVALID_HANDLE"; - case hipErrorIllegalState: - return "CUDA_ERROR_ILLEGAL_STATE"; - case hipErrorNotFound: - return "CUDA_ERROR_NOT_FOUND"; - case hipErrorNotReady: - return "CUDA_ERROR_NOT_READY"; - case hipErrorIllegalAddress: - return "CUDA_ERROR_ILLEGAL_ADDRESS"; - case hipErrorLaunchOutOfResources: - return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; - case hipErrorLaunchTimeOut: - return "CUDA_ERROR_LAUNCH_TIMEOUT"; - case hipErrorPeerAccessAlreadyEnabled: - return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; - case hipErrorPeerAccessNotEnabled: - return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; - case hipErrorSetOnActiveProcess: - return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; - case hipErrorContextIsDestroyed: - return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; - case hipErrorAssert: - return "CUDA_ERROR_ASSERT"; - case hipErrorHostMemoryAlreadyRegistered: - return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; - case hipErrorHostMemoryNotRegistered: - return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; - case hipErrorLaunchFailure: - return "CUDA_ERROR_LAUNCH_FAILED"; - case hipErrorNotSupported: - return "CUDA_ERROR_NOT_SUPPORTED"; - case hipErrorUnknown: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorRuntimeMemory: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorRuntimeOther: - return "CUDA_ERROR_UNKNOWN"; - case hipErrorCooperativeLaunchTooLarge: - return "CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE"; - case hipErrorStreamCaptureUnsupported: - return "CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED"; - case hipErrorStreamCaptureInvalidated: - return "CUDA_ERROR_STREAM_CAPTURE_INVALIDATED"; - case hipErrorStreamCaptureMerge: - return "CUDA_ERROR_STREAM_CAPTURE_MERGE"; - case hipErrorStreamCaptureUnmatched: - return "CUDA_ERROR_STREAM_CAPTURE_UNMATCHED"; - case hipErrorStreamCaptureUnjoined: - return "CUDA_ERROR_STREAM_CAPTURE_UNJOINED"; - case hipErrorStreamCaptureIsolation: - return "CUDA_ERROR_STREAM_CAPTURE_ISOLATION"; - case hipErrorStreamCaptureImplicit: - return "CUDA_ERROR_STREAM_CAPTURE_IMPLICIT"; - case hipErrorCapturedEvent: - return "CUDA_ERROR_CAPTURED_EVENT"; - case hipErrorStreamCaptureWrongThread: - return "CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD"; - case hipErrorGraphExecUpdateFailure: - return "CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE"; - default: - return "CUDA_ERROR_UNKNOWN"; - #endif - } -} +/** + * @addtogroup hipDrvGetErrorName hipDrvGetErrorName + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorName(hipError_t hip_error)` - + * Return hip error as text string form. + */ -// Functional test case -// Test case to verify the returned error name is same as generated error name. - -TEST_CASE("Unit_hipDrvGetErrorName_Functional") { +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Positive_Basic") { const char* error_string = nullptr; - hipError_t error_ret; const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), - std::end(kErrorEnumerators))); - error_ret = hipDrvGetErrorName(enumerator, &error_string); + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorName(enumerator, &error_string)); + REQUIRE(error_string != nullptr); REQUIRE(strcmp(error_string, ErrorName(enumerator)) == 0); - REQUIRE(error_ret == hipSuccess); } -// Negative test cases. - -TEST_CASE("Unit_hipDrvGetErrorName_Negative") { +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - AMD expected output: return "hipErrorUnknown" + * - NVIDIA expected output: return "cudaErrorUnknown" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorName.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorName_Negative_Parameters") { const char* error_string = nullptr; SECTION("pass unknown value to hipError") { - REQUIRE((hipDrvGetErrorName(static_cast(-1), &error_string)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorName(static_cast(-1), &error_string)), + hipErrorInvalidValue); } - #if HT_AMD +#if HT_AMD // segfaults on NVIDIA SECTION("pass nullptr to error string") { - REQUIRE((hipDrvGetErrorString(static_cast(0), nullptr)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(hipErrorInvalidValue, nullptr)), hipErrorInvalidValue); } - #endif +#endif } diff --git a/catch/unit/errorHandling/hipDrvGetErrorString.cc b/catch/unit/errorHandling/hipDrvGetErrorString.cc index 2b51a82422..5f35c344fe 100644 --- a/catch/unit/errorHandling/hipDrvGetErrorString.cc +++ b/catch/unit/errorHandling/hipDrvGetErrorString.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights @@ -17,247 +17,67 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include #include -#include "errorEnumerators.h" -// Local Function to return the error string. +#include "error_handling_common.hh" -static const char *ErrorString(hipError_t enumerator) { - switch (enumerator) { - case hipSuccess: - return "no error"; - case hipErrorInvalidValue: - return "invalid argument"; - case hipErrorOutOfMemory: - return "out of memory"; - case hipErrorNotInitialized: - return "initialization error"; - case hipErrorDeinitialized: - return "driver shutting down"; - case hipErrorProfilerDisabled: - return "profiler disabled while using external profiling tool"; - case hipErrorProfilerNotInitialized: - #if HT_AMD - return "profiler is not initialized"; - #elif HT_NVIDIA - return "profiler not initialized: call cudaProfilerInitialize()"; - #endif - case hipErrorProfilerAlreadyStarted: - return "profiler already started"; - case hipErrorProfilerAlreadyStopped: - return "profiler already stopped"; - #if HT_AMD - case hipErrorInvalidConfiguration: - return "invalid configuration argument"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidPitchValue: - return "invalid pitch argument"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidSymbol: - return "invalid device symbol"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidDevicePointer: - return "invalid device pointer"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidMemcpyDirection: - return "invalid copy direction for memcpy"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInsufficientDriver: - return "driver version is insufficient for runtime version"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorMissingConfiguration: - return "__global__ function call is not configured"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorPriorLaunchFailure: - return "unspecified launch failure in prior launch"; - #elif HT_NVIDIA - return "unknown error"; - #endif - #if HT_AMD - case hipErrorInvalidDeviceFunction: - return "invalid device function"; - #elif HT_NVIDIA - return "unknown error"; - #endif - case hipErrorNoDevice: - #if HT_AMD - return "no ROCm-capable device is detected"; - #elif HT_NVIDIA - return "no CUDA-capable device is detected"; - #endif - case hipErrorInvalidDevice: - return "invalid device ordinal"; - case hipErrorInvalidImage: - return "device kernel image is invalid"; - case hipErrorInvalidContext: - return "invalid device context"; - case hipErrorContextAlreadyCurrent: - #if HT_AMD - return "context is already current context"; - #elif HT_NVIDIA - return "context already current"; - #endif - case hipErrorMapFailed: - return "mapping of buffer object failed"; - case hipErrorUnmapFailed: - return "unmapping of buffer object failed"; - case hipErrorArrayIsMapped: - return "array is mapped"; - case hipErrorAlreadyMapped: - return "resource already mapped"; - case hipErrorNoBinaryForGpu: - return "no kernel image is available for execution on the device"; - case hipErrorAlreadyAcquired: - return "resource already acquired"; - case hipErrorNotMapped: - return "resource not mapped"; - case hipErrorNotMappedAsArray: - return "resource not mapped as array"; - case hipErrorNotMappedAsPointer: - return "resource not mapped as pointer"; - case hipErrorECCNotCorrectable: - return "uncorrectable ECC error encountered"; - case hipErrorUnsupportedLimit: - return "limit is not supported on this architecture"; - case hipErrorContextAlreadyInUse: - return "exclusive-thread device already in use by a different thread"; - case hipErrorPeerAccessUnsupported: - return "peer access is not supported between these two devices"; - case hipErrorInvalidKernelFile: - #if HT_AMD - return "invalid kernel file"; - #elif HT_NVIDIA - return "a PTX JIT compilation failed"; - #endif - case hipErrorInvalidGraphicsContext: - return "invalid OpenGL or DirectX context"; - case hipErrorInvalidSource: - return "device kernel image is invalid"; - case hipErrorFileNotFound: - return "file not found"; - case hipErrorSharedObjectSymbolNotFound: - return "shared object symbol not found"; - case hipErrorSharedObjectInitFailed: - return "shared object initialization failed"; - case hipErrorOperatingSystem: - return "OS call failed or operation not supported on this OS"; - case hipErrorInvalidHandle: - return "invalid resource handle"; - case hipErrorIllegalState: - return "the operation cannot be performed in the present state"; - case hipErrorNotFound: - return "named symbol not found"; - case hipErrorNotReady: - return "device not ready"; - case hipErrorIllegalAddress: - return "an illegal memory access was encountered"; - case hipErrorLaunchOutOfResources: - return "too many resources requested for launch"; - case hipErrorLaunchTimeOut: - return "the launch timed out and was terminated"; - case hipErrorPeerAccessAlreadyEnabled: - return "peer access is already enabled"; - case hipErrorPeerAccessNotEnabled: - return "peer access has not been enabled"; - case hipErrorSetOnActiveProcess: - return "cannot set while device is active in this process"; - case hipErrorContextIsDestroyed: - return "context is destroyed"; - case hipErrorAssert: - return "device-side assert triggered"; - case hipErrorHostMemoryAlreadyRegistered: - return "part or all of the requested memory range is already mapped"; - case hipErrorHostMemoryNotRegistered: - return "pointer does not correspond to a registered memory region"; - case hipErrorLaunchFailure: - return "unspecified launch failure"; - case hipErrorCooperativeLaunchTooLarge: - return "too many blocks in cooperative launch"; - case hipErrorNotSupported: - return "operation not supported"; - case hipErrorStreamCaptureUnsupported: - return "operation not permitted when stream is capturing"; - case hipErrorStreamCaptureInvalidated: - return "operation failed due to a previous error during capture"; - case hipErrorStreamCaptureMerge: - return "operation would result in a merge of separate capture sequences"; - case hipErrorStreamCaptureUnmatched: - return "capture was not ended in the same stream as it began"; - case hipErrorStreamCaptureUnjoined: - return "capturing stream has unjoined work"; - case hipErrorStreamCaptureIsolation: - return "dependency created on uncaptured work in another stream"; - case hipErrorStreamCaptureImplicit: - return "operation would make the legacy stream depend on a capturing blocking stream"; //NOLINT - case hipErrorCapturedEvent: - return "operation not permitted on an event last recorded in a capturing stream"; //NOLINT - case hipErrorStreamCaptureWrongThread: - return "attempt to terminate a thread-local capture sequence from another thread"; //NOLINT - case hipErrorGraphExecUpdateFailure: - return "the graph update was not performed because it included changes which violated constraints specific to instantiated graph update"; //NOLINT - case hipErrorRuntimeMemory: - return "runtime memory call returned error"; - case hipErrorRuntimeOther: - return "runtime call other than memory returned error"; - case hipErrorUnknown: - default: - #if HT_AMD - return "unknown error"; - #elif HT_NVIDIA - return "unknown error"; - #endif - } -} +/** + * @addtogroup hipDrvGetErrorString hipDrvGetErrorString + * @{ + * @ingroup ErrorTest + * `hipDrvGetErrorString(hipError_t hipError)` - + * Return handy text string message to explain the error which occurred. + */ -// Test case to verify the returned error string is -// same as generated error string. - -TEST_CASE("Unit_hipDrvGetErrorString_Functional") { +/** + * Test Description + * ------------------------ + * - Validate that the correct string is returned for each supported + * device error enumeration. + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = - GENERATE(from_range(std::begin(kErrorEnumerators), - std::end(kErrorEnumerators))); - hipError_t error_ret = hipDrvGetErrorString(enumerator, &error_string); + GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); + + HIP_CHECK(hipDrvGetErrorString(enumerator, &error_string)); + REQUIRE(error_string != nullptr); REQUIRE(strcmp(error_string, ErrorString(enumerator)) == 0); - REQUIRE(error_ret == hipSuccess); } -// Negative test cases. - -TEST_CASE("Unit_hipDrvGetErrorString_Negative") { +/** + * Test Description + * ------------------------ + * - Validate handling of invalid arguments: + * -# When error enumerator is invalid (-1) + * - Expected output: return "hipErrorInvalidValue" + * -# When nullptr is passed as store location + * - Expected output: return "hipErrorInvalidValue" + * Test source + * ------------------------ + * - unit/errorHandling/hipDrvGetErrorString.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.4 + */ +TEST_CASE("Unit_hipDrvGetErrorString_Negative_Parameters") { const char* error_string = nullptr; SECTION("pass unknown value to hipError") { - REQUIRE((hipDrvGetErrorString(static_cast(-1), &error_string)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(-1), &error_string)), + hipErrorInvalidValue); } - #if HT_AMD +#if HT_AMD // segfaults on NVIDIA SECTION("pass nullptr to error string") { - REQUIRE((hipDrvGetErrorString(static_cast(0), nullptr)) - == hipErrorInvalidValue); + HIP_CHECK_ERROR((hipDrvGetErrorString(static_cast(0), nullptr)), + hipErrorInvalidValue); } - #endif +#endif } diff --git a/catch/unit/errorHandling/hipGetErrorName.cc b/catch/unit/errorHandling/hipGetErrorName.cc index a498e62387..75d9f4a549 100644 --- a/catch/unit/errorHandling/hipGetErrorName.cc +++ b/catch/unit/errorHandling/hipGetErrorName.cc @@ -20,10 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#include "errorEnumerators.h" #include -#include + +#include "error_handling_common.hh" /** * @addtogroup hipGetErrorName hipGetErrorName @@ -49,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorName_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); error_string = hipGetErrorName(enumerator); diff --git a/catch/unit/errorHandling/hipGetErrorString.cc b/catch/unit/errorHandling/hipGetErrorString.cc index e38f0dc54e..6becd9fdb6 100644 --- a/catch/unit/errorHandling/hipGetErrorString.cc +++ b/catch/unit/errorHandling/hipGetErrorString.cc @@ -20,9 +20,9 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "errorEnumerators.h" #include -#include + +#include "error_handling_common.hh" /** * @addtogroup hipGetErrorString hipGetErrorString @@ -48,6 +48,7 @@ TEST_CASE("Unit_hipGetErrorString_Positive_Basic") { const char* error_string = nullptr; const auto enumerator = GENERATE(from_range(std::begin(kErrorEnumerators), std::end(kErrorEnumerators))); + INFO("Error: " << enumerator); error_string = hipGetErrorString(enumerator); diff --git a/catch/unit/errorHandling/hipPeekAtLastError.cc b/catch/unit/errorHandling/hipPeekAtLastError.cc index ae22a3067a..aac75e41e1 100644 --- a/catch/unit/errorHandling/hipPeekAtLastError.cc +++ b/catch/unit/errorHandling/hipPeekAtLastError.cc @@ -21,7 +21,6 @@ THE SOFTWARE. */ #include -#include #include /** @@ -56,7 +55,8 @@ TEST_CASE("Unit_hipPeekAtLastError_Positive_Basic") { * Test Description * ------------------------ * - Validate that appropriate error is returned when working with multiple threads. - * - Validate that appropriate error is returned for getting the last erro when working with multiple threads. + * - Validate that appropriate error is returned for getting the last error when working with + * multiple threads. * - Cause error on purpose within one of the threads. * Test source * ------------------------ From 20a38116967ed6665262c6ccb9ce766e1b894e72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:04:34 +0100 Subject: [PATCH 43/71] EXSWHTEC-345 - Implement tests for hipPointerSetAttribute #427 Change-Id: I71e4eb3aab07eff8e24f54fa266a3971839501d1 --- catch/hipTestMain/config/config_amd_linux | 2 + catch/hipTestMain/config/config_amd_windows | 2 + catch/unit/memory/CMakeLists.txt | 13 ++- catch/unit/memory/hipPointerSetAttribute.cc | 108 ++++++++++++++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 catch/unit/memory/hipPointerSetAttribute.cc diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index cf72d409d4..66107df9e1 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -128,6 +128,8 @@ "Unit_hipEventIpc", "=== SWDEV-427101:Below test fails randomly in PSDB ===", "Unit_deviceAllocation_InOneThread_AccessInAllThreads", + "=== Below test is disabled due to defect EXSWHTEC-347 ===", + "Unit_hipPointerSetAttribute_Positive_SyncMemops", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 724aef781e..5631444f2b 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -213,6 +213,8 @@ "Unit_hipVectorTypes_test_on_device", "Unit_Layered1DTexture_Check_DeviceBufferToFromLayered1DArray - ushort4", "Unit_Layered2DTexture_Check_DeviceBufferToFromLayered2DArray - float4", + "=== Below test is disabled due to defect EXSWHTEC-347 ===", + "Unit_hipPointerSetAttribute_Positive_SyncMemops", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "NOTE: The following test is disabled due to defect - EXSWHTEC-241", diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index d99cf33b32..431a9f72e1 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -83,7 +83,18 @@ if(HIP_PLATFORM MATCHES "amd") hipExtMallocWithFlags.cc hipMallocMngdMultiThread.cc hipArray.cc - hipMemVmm.cc) + hipMemVmm.cc + hipPointerSetAttribute.cc # Should be compiled for NVIDIA as well after EXSWHTEC-346 is addressed + hipMemCreate.cc + hipMemMap.cc + hipMemGetAllocationGranularity.cc + hipMemSetGetAccess.cc + hipMemRetainAllocationHandle.cc + hipMemUnmap.cc + hipMemAddressFree.cc + hipMemAddressReserve.cc + hipMemRelease.cc + hipMemGetAllocationPropertiesFromHandle.cc) else() set(TEST_SRC ${TEST_SRC} hipGetSymbolSizeAddress.cc) endif() diff --git a/catch/unit/memory/hipPointerSetAttribute.cc b/catch/unit/memory/hipPointerSetAttribute.cc new file mode 100644 index 0000000000..ebce0855b2 --- /dev/null +++ b/catch/unit/memory/hipPointerSetAttribute.cc @@ -0,0 +1,108 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipPointerSetAttribute hipPointerSetAttribute + * @{ + * @ingroup MemoryTest + * `hipPointerSetAttribute(const void* value, hipPointer_attribute attribute, hipDeviceptr_t ptr)` - + * Set attributes on a previously allocated memory region. + */ + +#include +#include +#include + +/** + * Test Description + * ------------------------ + * - Sets pointer attribute `HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS` and verifies behavior. + * Test source + * ------------------------ + * - unit/memory/hipPointerSetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipPointerSetAttribute_Positive_SyncMemops") { + LinearAllocGuard src(LinearAllocs::hipMalloc, 1024); + LinearAllocGuard dst(LinearAllocs::hipMalloc, 1024); + + StreamGuard stream(Streams::created); + LaunchDelayKernel(std::chrono::milliseconds{100}, stream.stream()); + HIP_CHECK(hipMemcpy(dst.ptr(), src.ptr(), 1024, hipMemcpyDeviceToDevice)); + HIP_CHECK_ERROR(hipStreamQuery(stream.stream()), hipErrorNotReady); + + bool value = true; + HIP_CHECK(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, + reinterpret_cast(src.ptr()))); + HIP_CHECK(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, + reinterpret_cast(dst.ptr()))); + + LaunchDelayKernel(std::chrono::milliseconds{100}, stream.stream()); + HIP_CHECK(hipMemcpy(dst.ptr(), src.ptr(), 1024, hipMemcpyDeviceToDevice)); + HIP_CHECK(hipStreamQuery(stream.stream())); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipPointerSetAttribute`. + * Test source + * ------------------------ + * - unit/memory/hipPointerSetAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipPointerSetAttribute_Negative_Parameters") { + LinearAllocGuard mem(LinearAllocs::hipMalloc, 4); + bool value = false; + + SECTION("value is nullptr") { + HIP_CHECK_ERROR(hipPointerSetAttribute(nullptr, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, mem.ptr()), + hipErrorInvalidValue); + } + + SECTION("invalid attribute") { + HIP_CHECK_ERROR( + hipPointerSetAttribute(&value, static_cast(-1), mem.ptr()), + hipErrorInvalidValue); + } + + SECTION("ptr is nullptr") { + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, nullptr), + hipErrorInvalidValue); + } + + SECTION("host pointer") { + int mem_host; + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, &mem_host), + hipErrorInvalidDevicePointer); + } + + SECTION("freed pointer") { + HIP_CHECK(hipFree(mem.ptr())); + HIP_CHECK_ERROR(hipPointerSetAttribute(&value, HIP_POINTER_ATTRIBUTE_SYNC_MEMOPS, mem.ptr()), + hipErrorInvalidDevicePointer); + } +} \ No newline at end of file From 39bd6bcac6900ecd996fa3ad5c91fbf2cef0c33a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:04:08 +0100 Subject: [PATCH 44/71] EXSWHTEC-348 - Implement tests for hipArrayGetInfo, hipArrayGetDescriptor, and hipArray3DGetDescriptor #428 Change-Id: I08a7c96c971f5ee82be4e69343600530a214a117 --- catch/unit/memory/CMakeLists.txt | 41 ++++++++- catch/unit/memory/hipArray3DGetDescriptor.cc | 88 ++++++++++++++++++ catch/unit/memory/hipArrayGetDescriptor.cc | 88 +++++++++++++++++- catch/unit/memory/hipArrayGetInfo.cc | 94 ++++++++++++++++++++ 4 files changed, 308 insertions(+), 3 deletions(-) create mode 100644 catch/unit/memory/hipArray3DGetDescriptor.cc create mode 100644 catch/unit/memory/hipArrayGetInfo.cc diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index 431a9f72e1..4ef22c8013 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -164,10 +164,49 @@ set(TEST_SRC hipStreamAttachMemAsync.cc hipMemRangeGetAttributes_old.cc hipMemGetAddressRange.cc - hipArrayGetDescriptor.cc hipMallocMipmappedArray.cc hipFreeMipmappedArray.cc) +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + ${TEST_SRC} + # Below 3 tests should be compiled for NVIDIA as well after EXSWHTEC-349 is addressed + hipArrayGetInfo.cc + hipArrayGetDescriptor.cc + hipArray3DGetDescriptor.cc) +endif() + +set(NOT_FOR_MI200_AND_ABOVE_TEST hipMallocArray.cc hipArrayCreate.cc) # tests not for MI200+ +set(MI200_AND_ABOVE_TARGETS gfx90a gfx940 gfx941 gfx942) +function(CheckRejectedArchs OFFLOAD_ARCH_STR_LOCAL) + set(ARCH_CHECK -1 PARENT_SCOPE) + string(REGEX MATCHALL "--offload-arch=gfx[0-9a-z]+" OFFLOAD_ARCH_LIST ${OFFLOAD_ARCH_STR_LOCAL}) + foreach(OFFLOAD_ARCH IN LISTS OFFLOAD_ARCH_LIST) + string(REGEX MATCHALL "--offload-arch=(gfx[0-9a-z]+)" matches ${OFFLOAD_ARCH}) + if (CMAKE_MATCH_COUNT EQUAL 1) + if (CMAKE_MATCH_1 IN_LIST MI200_AND_ABOVE_TARGETS) + set(ARCH_CHECK 1 PARENT_SCOPE) + endif() # CMAKE_MATCH_1 + endif() # CMAKE_MATCH_COUNT + endforeach() # OFFLOAD_ARCH_LIST +endfunction() # CheckAcceptedArchs + +if(HIP_PLATFORM MATCHES "amd") + if (DEFINED OFFLOAD_ARCH_STR) + CheckRejectedArchs(${OFFLOAD_ARCH_STR}) + elseif(DEFINED $ENV{HCC_AMDGPU_TARGET}) + CheckRejectedArchs($ENV{HCC_AMDGPU_TARGET}) + else() + set(ARCH_CHECK -1) + endif() + if(${ARCH_CHECK} EQUAL -1) + message(STATUS "Adding test: ${NOT_FOR_MI200_AND_ABOVE_TEST}") + set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) + endif() +else() + set(TEST_SRC ${TEST_SRC} ${NOT_FOR_MI200_AND_ABOVE_TEST}) +endif() + hip_add_exe_to_target(NAME MemoryTest2 TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) diff --git a/catch/unit/memory/hipArray3DGetDescriptor.cc b/catch/unit/memory/hipArray3DGetDescriptor.cc new file mode 100644 index 0000000000..31a2881dd8 --- /dev/null +++ b/catch/unit/memory/hipArray3DGetDescriptor.cc @@ -0,0 +1,88 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipArray3DGetDescriptor hipArray3DGetDescriptor + * @{ + * @ingroup MemoryTest + * `hipArray3DGetDescriptor(HIP_ARRAY3D_DESCRIPTOR* pArrayDescriptor, hipArray* array)` - + * Gets a 3D array descriptor. + */ + +#include +#include + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArray3DGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArray3DGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArray3DGetDescriptor_Positive_Basic") { + DrvArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + HIP_ARRAY3D_DESCRIPTOR desc; + HIP_CHECK(hipArray3DGetDescriptor(&desc, array.ptr())); + + using vec_info = vector_info; + REQUIRE(desc.Format == vec_info::format); + REQUIRE(desc.NumChannels == vec_info::size); + REQUIRE(desc.Width == 1024 / sizeof(float)); + REQUIRE(desc.Height == 4); + REQUIRE(desc.Depth == 2); + REQUIRE(desc.Flags == 0); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArray3DGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArray3DGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArray3DGetDescriptor_Negative_Parameters") { + DrvArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + HIP_ARRAY3D_DESCRIPTOR desc; + + SECTION("desc is nullptr") { + HIP_CHECK_ERROR(hipArray3DGetDescriptor(nullptr, array.ptr()), hipErrorInvalidValue); + } + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArray3DGetDescriptor(&desc, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipArrayDestroy(array.ptr())); + HIP_CHECK_ERROR(hipArray3DGetDescriptor(&desc, array.ptr()), hipErrorInvalidHandle); + } +} \ No newline at end of file diff --git a/catch/unit/memory/hipArrayGetDescriptor.cc b/catch/unit/memory/hipArrayGetDescriptor.cc index c977590987..cac8809cb7 100644 --- a/catch/unit/memory/hipArrayGetDescriptor.cc +++ b/catch/unit/memory/hipArrayGetDescriptor.cc @@ -16,12 +16,16 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include + #include + #include #include +#include +#include +#include + static bool testPassed1D = false; static bool testPassed2D = false; static constexpr auto NUM_ELM{1024}; @@ -459,3 +463,83 @@ TEST_CASE("Unit_hipArrayGetDescriptor_Negative_Scenarios") { #endif } +/** + * @addtogroup hipArrayGetDescriptor hipArrayGetDescriptor + * @{ + * @ingroup MemoryTest + * `hipArrayGetDescriptor(HIP_ARRAY_DESCRIPTOR* pArrayDescriptor, hipArray* array)` - + * Gets a 1D or 2D array descriptor. + */ + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArrayGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetDescriptor_Positive_Basic") { + HIP_ARRAY_DESCRIPTOR expected_desc{}; + using vec_info = vector_info; + expected_desc.Format = vec_info::format; + expected_desc.NumChannels = vec_info::size; + expected_desc.Width = 1024 / sizeof(float); + expected_desc.Height = 4; + + hipArray_t ptr; + HIP_CHECK(hipArrayCreate(&ptr, &expected_desc)); + + HIP_ARRAY_DESCRIPTOR desc; + HIP_CHECK(hipArrayGetDescriptor(&desc, ptr)); + + REQUIRE(desc.Format == expected_desc.Format); + REQUIRE(desc.NumChannels == expected_desc.NumChannels); + REQUIRE(desc.Width == expected_desc.Width); + REQUIRE(desc.Height == expected_desc.Height); + + HIP_CHECK(hipArrayDestroy(ptr)); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArrayGetDescriptor`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetDescriptor.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetDescriptor_Negative_Parameters") { + HIP_ARRAY_DESCRIPTOR expected_desc{}; + using vec_info = vector_info; + expected_desc.Format = vec_info::format; + expected_desc.NumChannels = vec_info::size; + expected_desc.Width = 1024 / sizeof(float); + expected_desc.Height = 4; + + hipArray_t ptr; + HIP_CHECK(hipArrayCreate(&ptr, &expected_desc)); + + HIP_ARRAY_DESCRIPTOR desc; + + SECTION("desc is nullptr") { + HIP_CHECK_ERROR(hipArrayGetDescriptor(nullptr, ptr), hipErrorInvalidValue); + } + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArrayGetDescriptor(&desc, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipArrayDestroy(ptr)); + HIP_CHECK_ERROR(hipArrayGetDescriptor(&desc, ptr), hipErrorInvalidHandle); + } + + static_cast(hipArrayDestroy(ptr)); +} diff --git a/catch/unit/memory/hipArrayGetInfo.cc b/catch/unit/memory/hipArrayGetInfo.cc new file mode 100644 index 0000000000..10ebbfe1e7 --- /dev/null +++ b/catch/unit/memory/hipArrayGetInfo.cc @@ -0,0 +1,94 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipArrayGetInfo hipArrayGetInfo + * @{ + * @ingroup MemoryTest + * `hipArrayGetInfo(hipChannelFormatDesc* desc, hipExtent* extent, unsigned int* flags, hipArray* + * array)` - Gets info about the specified array. + */ + +#include +#include + +/** + * Test Description + * ------------------------ + * - Basic sanity test for `hipArrayGetInfo`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetInfo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetInfo_Positive_Basic") { + ArrayAllocGuard array(make_hipExtent(1024, 4, 2)); + + hipChannelFormatDesc desc; + hipExtent extent; + unsigned int flags = 1; + + HIP_CHECK(hipArrayGetInfo(&desc, &extent, &flags, array.ptr())); + + REQUIRE(extent.width == 1024); + REQUIRE(extent.height == 4); + REQUIRE(extent.depth == 2); + + REQUIRE(flags == 0); + + auto expected_desc = hipCreateChannelDesc(); + REQUIRE(desc.x == expected_desc.x); + REQUIRE(desc.y == expected_desc.y); + REQUIRE(desc.z == expected_desc.z); + REQUIRE(desc.w == expected_desc.w); + REQUIRE(desc.f == expected_desc.f); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipArrayGetInfo`. + * Test source + * ------------------------ + * - unit/memory/hipArrayGetInfo.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEST_CASE("Unit_hipArrayGetInfo_Negative_Parameters") { + ArrayAllocGuard array(make_hipExtent(1024, 4, 4)); + + hipChannelFormatDesc desc; + hipExtent extent; + unsigned int flags; + + SECTION("array is nullptr") { + HIP_CHECK_ERROR(hipArrayGetInfo(&desc, &extent, &flags, nullptr), hipErrorInvalidHandle); + } + + SECTION("array is freed") { + HIP_CHECK(hipFreeArray(array.ptr())); + HIP_CHECK_ERROR(hipArrayGetInfo(&desc, &extent, &flags, array.ptr()), hipErrorInvalidHandle); + } +} \ No newline at end of file From 303836a7fda08afe8e57f01212dd8f5afc12002b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:47:48 +0100 Subject: [PATCH 45/71] EXSWHTEC-350 - Implement tests for hipModuleLaunchCooperativeKernel #429 Change-Id: I42342c7d4cceed67990f603cd83473fe76e7f856 --- catch/include/hip_test_defgroups.hh | 7 + catch/unit/module/CMakeLists.txt | 2 + .../hipModuleLaunchCooperativeKernel.cc | 211 ++++++++++++++++ ...oduleLaunchCooperativeKernelMultiDevice.cc | 227 ++++++++++++++++++ catch/unit/module/launch_kernel_module.cc | 6 + 5 files changed, 453 insertions(+) create mode 100644 catch/unit/module/hipModuleLaunchCooperativeKernel.cc create mode 100644 catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 0a56d94239..8191c8b96f 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -179,6 +179,13 @@ THE SOFTWARE. * @} */ +/** + * @defgroup ModuleTest Module Management + * @{ + * This section describes the module management types & functions of HIP runtime API. + * @} + */ + /** * @defgroup TextureTest Texture Management * @{ diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index 5d951cf27c..76ca9e9ec6 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -29,6 +29,8 @@ set(TEST_SRC hipModuleLaunchKernel.cc hipModuleGetGlobal.cc hipModuleGetTexRef.cc + hipModuleLaunchCooperativeKernel.cc + hipModuleLaunchCooperativeKernelMultiDevice.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code diff --git a/catch/unit/module/hipModuleLaunchCooperativeKernel.cc b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc new file mode 100644 index 0000000000..0ca6a31293 --- /dev/null +++ b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc @@ -0,0 +1,211 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipModuleLaunchCooperativeKernel hipModuleLaunchCooperativeKernel + * @{ + * @ingroup ModuleTest + * `hipModuleLaunchCooperativeKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, + * unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, + * unsigned int sharedMemBytes, hipStream_t stream, void ** kernelParams)` - + * Launches kernel f with launch parameters and shared memory on stream with arguments passed to + * kernelParams, where thread blocks can cooperate and synchronize as they execute. + */ + +#include +#include +#include + +#include "hip_module_launch_kernel_common.hh" + +/** + * Test Description + * ------------------------ + * - Tests `hipModuleLaunchCooperativeKernel` for a cooperative kernel with no parameters, and for + * a normal kernel with parameters. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + SECTION("Cooperative kernel with no arguments") { + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 2, 2, 1, 1, 1, 1, 0, nullptr, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + hipFunction_t f = GetKernel(mg.module(), "Kernel42"); + + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, 0, nullptr, kernel_args)); + + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +/** + * Test Description + * ------------------------ + * - Positive parameters test for `hipModuleLaunchCooperativeKernel`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Positive_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, y, 1, 1, 0, nullptr, nullptr)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, z, 1, 1, 0, nullptr, nullptr)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipModuleLaunchCooperativeKernel`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernel.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); + + SECTION("f == nullptr") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernel(nullptr, 1, 1, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidResourceHandle); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 0, 1, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 0, 1, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 0, 1, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 0, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 0, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 0, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, y, 1, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, z, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernel(f, 1, 1, 1, dim, dim, dim, 0, nullptr, nullptr), + hipErrorInvalidValue); + } + +#if HT_AMD // Disabled due to defect EXSWHTEC-351 + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, max, nullptr, nullptr), + hipErrorInvalidValue); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, 0, stream, nullptr), + hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc b/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc new file mode 100644 index 0000000000..1deaae02c0 --- /dev/null +++ b/catch/unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc @@ -0,0 +1,227 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipModuleLaunchCooperativeKernelMultiDevice + * hipModuleLaunchCooperativeKernelMultiDevice + * @{ + * @ingroup ModuleTest + * `hipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* launchParamsList, unsigned + * int numDevices, unsigned int flags)` - + * Launches kernels on multiple devices where thread blocks can cooperate and synchronize as they + * execute. + */ + +#include +#include +#include + +#include "hip_module_launch_kernel_common.hh" + +/** + * Test Description + * ------------------------ + * - Tests `hipModuleLaunchCooperativeKernel` for a cooperative kernel with no parameters. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Positive_Basic") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + HIP_CHECK(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u)); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamSynchronize(params.hStream)); + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test for `hipModuleLaunchCooperativeKernelMultiDevice`. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_Parameters") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + const auto device_count = HipTest::getDeviceCount(); + + std::vector params_list(device_count); + + int device = 0; + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipSetDevice(device++)); + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + SECTION("launchParamsList == nullptr") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(nullptr, device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices == 0") { + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), 0, 0u), + hipErrorInvalidValue); + } + + SECTION("numDevices > device count") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count + 1, 0u), + hipErrorInvalidValue); + } + + SECTION("invalid flags") { + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 999), + hipErrorInvalidValue); + } + + if (device_count > 1) { + SECTION("launchParamsList.func doesn't match across all devices") { + params_list[1].function = GetKernel(mg.module(), "NOPKernel"); + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.gridDim doesn't match across all kernels") { + params_list[1].gridDimX = 2; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.blockDim doesn't match across all kernels") { + params_list[1].blockDimX = 2; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + + SECTION("launchParamsList.sharedMem doesn't match across all kernels") { + params_list[1].sharedMemBytes = 1024; + HIP_CHECK_ERROR( + hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), device_count, 0u), + hipErrorInvalidValue); + } + } + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} + +/** + * Test Description + * ------------------------ + * - Tries running `hipModuleLaunchCooperativeKernelMultiDevice` with multiple kernels on the same + * device. + * Test source + * ------------------------ + * - unit/module/hipModuleLaunchCooperativeKernelMultiDevice.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.5 + */ +TEST_CASE("Unit_hipModuleLaunchCooperativeKernelMultiDevice_Negative_MultiKernelSameDevice") { + if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) { + HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported"); + return; + } + + hipFunction_t f = GetKernel(mg.module(), "CoopKernel"); + + HIP_CHECK(hipSetDevice(0)); + + std::vector params_list(2); + + for (auto& params : params_list) { + params.function = f; + params.gridDimX = 1; + params.gridDimY = 1; + params.gridDimZ = 1; + params.blockDimX = 1; + params.blockDimY = 1; + params.blockDimZ = 1; + params.kernelParams = nullptr; + params.sharedMemBytes = 0; + HIP_CHECK(hipStreamCreate(¶ms.hStream)); + } + + HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernelMultiDevice(params_list.data(), 2, 0u), + hipErrorInvalidValue); + + for (const auto params : params_list) { + HIP_CHECK(hipStreamDestroy(params.hStream)); + } +} \ No newline at end of file diff --git a/catch/unit/module/launch_kernel_module.cc b/catch/unit/module/launch_kernel_module.cc index 01c04b45d6..12821da450 100644 --- a/catch/unit/module/launch_kernel_module.cc +++ b/catch/unit/module/launch_kernel_module.cc @@ -20,6 +20,7 @@ THE SOFTWARE. */ #include +#include extern "C" { __global__ void NOPKernel() {} @@ -34,4 +35,9 @@ __global__ void Delay(uint32_t interval, const uint32_t ticks_per_ms) { } } } + +__global__ void CoopKernel() { + cooperative_groups::grid_group grid = cooperative_groups::this_grid(); + grid.sync(); +} } \ No newline at end of file From 0ecb874b525d72c969ffc45a23c7ff23b9de9213 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:29:28 +0100 Subject: [PATCH 46/71] EXSWHTEC-357 - Implement tests for 1D mipmapped texture device functions #431 Change-Id: I74b4bc2a73ec8bbe21460eda8cdc79210fe2501e --- catch/include/resource_guards.hh | 36 +++++++++++ catch/unit/texture/kernels.hh | 2 +- catch/unit/texture/test_fixture.hh | 27 ++++++-- catch/unit/texture/tex1D.cc | 1 - catch/unit/texture/tex1DGrad.cc | 9 ++- catch/unit/texture/tex1DLayeredGrad.cc | 90 ++++++++++++++------------ catch/unit/texture/tex1DLayeredLod.cc | 89 +++++++++++++------------ catch/unit/texture/tex1DLod.cc | 9 ++- 8 files changed, 165 insertions(+), 98 deletions(-) diff --git a/catch/include/resource_guards.hh b/catch/include/resource_guards.hh index 20c1a20ee5..c2f32e39f5 100644 --- a/catch/include/resource_guards.hh +++ b/catch/include/resource_guards.hh @@ -216,6 +216,42 @@ template class ArrayAllocGuard { const hipExtent extent_; }; +template class MipmappedArrayAllocGuard { + public: + // extent should contain logical width + MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int levels, + const unsigned int flags) + : extent_{extent}, levels_{levels} { + hipChannelFormatDesc desc = hipCreateChannelDesc(); + HIP_CHECK(hipMallocMipmappedArray(&ptr_, &desc, extent_, levels_, flags)); + } + + MipmappedArrayAllocGuard(const hipExtent extent, const unsigned int flags = 0u) + : MipmappedArrayAllocGuard{extent, 1, flags} {} + + ~MipmappedArrayAllocGuard() { static_cast(hipFreeMipmappedArray(ptr_)); } + + MipmappedArrayAllocGuard(const MipmappedArrayAllocGuard&) = delete; + MipmappedArrayAllocGuard(MipmappedArrayAllocGuard&&) = delete; + + hipMipmappedArray_t ptr() const { return ptr_; } + + hipArray_t GetLevel(unsigned int level) { + hipArray_t ret; + HIP_CHECK(hipGetMipmappedArrayLevel(&ret, ptr_, level)); + return ret; + } + + hipExtent extent() const { return extent_; } + + unsigned int levels() const { return levels_; } + + private: + hipMipmappedArray_t ptr_ = nullptr; + const hipExtent extent_; + const unsigned int levels_; +}; + template class DrvArrayAllocGuard { public: // extent should contain width in bytes diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index f9f7a6a41e..2e0bf256cd 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -87,7 +87,7 @@ __global__ void tex1DGradKernel(TexelType* const out, size_t N, hipTextureObject template __global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextureObject_t tex_obj, size_t width, size_t num_subdivisions, - bool normalized_coords, float dx, float dy, int layer) { + bool normalized_coords, int layer, float dx, float dy) { const auto tid = cg::this_grid().thread_rank(); if (tid >= N) return; diff --git a/catch/unit/texture/test_fixture.hh b/catch/unit/texture/test_fixture.hh index 28ca9ee2df..222c4deb9e 100644 --- a/catch/unit/texture/test_fixture.hh +++ b/catch/unit/texture/test_fixture.hh @@ -99,19 +99,25 @@ template struct TextureTestParams { tex_desc.addressMode[0] = address_mode_x; if (extent.height) tex_desc.addressMode[1] = address_mode_y; if (extent.depth) tex_desc.addressMode[2] = address_mode_z; + + tex_desc.mipmapFilterMode = tex_desc.filterMode; } }; -template struct TextureTestFixture { +template +struct TextureTestFixture { using VecType = vec4; using OutType = std::conditional_t, VecType>; + template + using ArrayAllocGuardType = + std::conditional_t, ArrayAllocGuard>; TextureTestParams params; hipResourceDesc res_desc; LinearAllocGuard host_alloc; TextureReference tex_h; - ArrayAllocGuard tex_alloc_d; + ArrayAllocGuardType tex_alloc_d; TextureGuard tex; LinearAllocGuard out_alloc_d; std::vector out_alloc_h; @@ -131,9 +137,13 @@ template struct TextureTestFix SetVec4(host_alloc.ptr()[i], i + test_value_offset); } - hipMemcpy3DParms memcpy_params; + hipMemcpy3DParms memcpy_params = {}; memset(&memcpy_params, 0 sizeof(hipMemcpy3DParms)); - memcpy_params.dstArray = tex_alloc_d.ptr(); + if constexpr (mipmap) { + memcpy_params.dstArray = tex_alloc_d.GetLevel(0); + } else { + memcpy_params.dstArray = tex_alloc_d.ptr(); + } memcpy_params.extent = params.LayeredExtent(); memcpy_params.extent.height = memcpy_params.extent.height ?: 1; memcpy_params.extent.depth = memcpy_params.extent.depth ?: 1; @@ -143,8 +153,13 @@ template struct TextureTestFix HIP_CHECK(hipMemcpy3D(&memcpy_params)); memset(&res_desc, 0, sizeof(res_desc)); - res_desc.resType = hipResourceTypeArray; - res_desc.res.array.array = tex_alloc_d.ptr(); + if constexpr (mipmap) { + res_desc.resType = hipResourceTypeMipmappedArray; + res_desc.res.mipmap.mipmap = tex_alloc_d.ptr(); + } else { + res_desc.resType = hipResourceTypeArray; + res_desc.res.array.array = tex_alloc_d.ptr(); + } return &res_desc; } diff --git a/catch/unit/texture/tex1D.cc b/catch/unit/texture/tex1D.cc index de92fca2ee..17f360d17f 100644 --- a/catch/unit/texture/tex1D.cc +++ b/catch/unit/texture/tex1D.cc @@ -127,7 +127,6 @@ TEMPLATE_TEST_CASE("Unit_tex1D_Positive_ReadModeNormalizedFloat", "", char, unsi INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("x: " << std::fixed << std::setprecision(16) << x); auto ref_val = diff --git a/catch/unit/texture/tex1DGrad.cc b/catch/unit/texture/tex1DGrad.cc index 5b893d0954..1c006571b1 100644 --- a/catch/unit/texture/tex1DGrad.cc +++ b/catch/unit/texture/tex1DGrad.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex1DGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -57,7 +57,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeElementType", "", char, unsi params.num_subdivisions = 4; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); tex1DGradKernel><<>>( @@ -99,7 +99,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeElementType", "", char, unsi * - unit/texture/tex1DGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -110,7 +110,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeNormalizedFloat", "", char, params.num_subdivisions = 4; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); tex1DGradKernel><<>>( @@ -127,7 +127,6 @@ TEMPLATE_TEST_CASE("Unit_tex1DGrad_Positive_ReadModeNormalizedFloat", "", char, INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("x: " << std::fixed << std::setprecision(16) << x); auto ref_val = diff --git a/catch/unit/texture/tex1DLayeredGrad.cc b/catch/unit/texture/tex1DLayeredGrad.cc index 07c9734619..6115c939b8 100644 --- a/catch/unit/texture/tex1DLayeredGrad.cc +++ b/catch/unit/texture/tex1DLayeredGrad.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex1DLayeredGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -54,33 +54,38 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeElementType", "", cha TextureTestParams params = {}; params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; params.num_subdivisions = 4; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); - tex1DLayeredGradKernel><<>>( - fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), - params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f, 0); - fixture.LoadOutput(); + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.5f, 0.5f); - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + fixture.LoadOutput(); - INFO("Index: " << i); - INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("x: " << std::fixed << std::setprecision(16) << x); + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); - REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } } } @@ -99,7 +104,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeElementType", "", cha * - unit/texture/tex1DLayeredGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -107,34 +112,39 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredGrad_Positive_ReadModeNormalizedFloat", "", TextureTestParams params = {}; params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; params.num_subdivisions = 4; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); - tex1DLayeredGradKernel><<>>( - fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), - params.num_subdivisions, params.tex_desc.normalizedCoords, 0.5f, 0.5f, 0); - fixture.LoadOutput(); + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0.5f, 0.5f); - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + fixture.LoadOutput(); - INFO("i: " << i); - INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("x: " << std::fixed << std::setprecision(16) << x); + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto ref_val = - Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); - REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } } } \ No newline at end of file diff --git a/catch/unit/texture/tex1DLayeredLod.cc b/catch/unit/texture/tex1DLayeredLod.cc index 874c99c85a..a39d664502 100644 --- a/catch/unit/texture/tex1DLayeredLod.cc +++ b/catch/unit/texture/tex1DLayeredLod.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex1DLayeredLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -54,33 +54,38 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeElementType", "", char TextureTestParams params = {}; params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; params.num_subdivisions = 4; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); - tex1DLayeredLodKernel><<>>( - fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), - params.num_subdivisions, params.tex_desc.normalizedCoords, 0, 0); - fixture.LoadOutput(); + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0); - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + fixture.LoadOutput(); - INFO("Index: " << i); - INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("x: " << std::fixed << std::setprecision(16) << x); + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto ref_val = fixture.tex_h.Tex1D(x, params.tex_desc); - REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + INFO("Layer: " << layer); + INFO("Index: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } } } @@ -99,7 +104,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeElementType", "", char * - unit/texture/tex1DLayeredLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -107,34 +112,38 @@ TEMPLATE_TEST_CASE("Unit_tex1DLayeredLod_Positive_ReadModeNormalizedFloat", "", TextureTestParams params = {}; params.extent = make_hipExtent(1024, 0, 0); + params.layers = 2; params.num_subdivisions = 4; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); - tex1DLayeredLodKernel><<>>( - fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), - params.num_subdivisions, params.tex_desc.normalizedCoords, 0, 0); - fixture.LoadOutput(); + for (auto layer = 0u; layer < params.layers; ++layer) { + tex1DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), fixture.tex.object(), params.Width(), + params.num_subdivisions, params.tex_desc.normalizedCoords, layer, 0); - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + fixture.LoadOutput(); - INFO("i: " << i); - INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("x: " << std::fixed << std::setprecision(16) << x); + for (auto i = 0u; i < params.NumItersX(); ++i) { + float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto ref_val = - Vec4Map(fixture.tex_h.Tex1D(x, params.tex_desc), NormalizeInteger); - REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + INFO("Layer: " << layer); + INFO("i: " << i); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + + auto ref_val = Vec4Map(fixture.tex_h.Tex1DLayered(x, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } } } \ No newline at end of file diff --git a/catch/unit/texture/tex1DLod.cc b/catch/unit/texture/tex1DLod.cc index ceee1211b3..e38ed60745 100644 --- a/catch/unit/texture/tex1DLod.cc +++ b/catch/unit/texture/tex1DLod.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex1DLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -57,7 +57,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeElementType", "", char, unsig params.num_subdivisions = 4; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); tex1DLodKernel><<>>( @@ -99,7 +99,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeElementType", "", char, unsig * - unit/texture/tex1DLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -110,7 +110,7 @@ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeNormalizedFloat", "", char, u params.num_subdivisions = 4; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads, num_blocks] = GetLaunchConfig(1024, params.NumItersX()); tex1DLodKernel><<>>( @@ -127,7 +127,6 @@ TEMPLATE_TEST_CASE("Unit_tex1DLod_Positive_ReadModeNormalizedFloat", "", char, u INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Filter mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("x: " << std::fixed << std::setprecision(16) << x); auto ref_val = From 90a783c959172aa3887876aa6ac99b5744d2c6a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:22:50 +0100 Subject: [PATCH 47/71] EXSWHTEC-358 - Implement tests for 2D mipmapped texture device functions #432 Change-Id: I8d517ef95eeddfcc442f00e331e3ed0d1123d99c --- catch/hipTestMain/config/config_amd_linux | 11 ++ catch/unit/texture/CMakeLists.txt | 7 + catch/unit/texture/kernels.hh | 83 ++++++++++ catch/unit/texture/tex2D.cc | 87 +++++----- catch/unit/texture/tex2DGrad.cc | 175 +++++++++++++++++++++ catch/unit/texture/tex2DLayered.cc | 90 +++++------ catch/unit/texture/tex2DLayeredGrad.cc | 183 ++++++++++++++++++++++ catch/unit/texture/tex2DLayeredLod.cc | 183 ++++++++++++++++++++++ catch/unit/texture/tex2DLod.cc | 175 +++++++++++++++++++++ catch/unit/texture/tex2Dgather.cc | 104 ++++++++++++ catch/unit/texture/texture_reference.hh | 36 +++++ 11 files changed, 1045 insertions(+), 89 deletions(-) create mode 100644 catch/unit/texture/tex2DGrad.cc create mode 100644 catch/unit/texture/tex2DLayeredGrad.cc create mode 100644 catch/unit/texture/tex2DLayeredLod.cc create mode 100644 catch/unit/texture/tex2DLod.cc create mode 100644 catch/unit/texture/tex2Dgather.cc diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 66107df9e1..99c33f7834 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -128,8 +128,19 @@ "Unit_hipEventIpc", "=== SWDEV-427101:Below test fails randomly in PSDB ===", "Unit_deviceAllocation_InOneThread_AccessInAllThreads", +<<<<<<< HEAD "=== Below test is disabled due to defect EXSWHTEC-347 ===", "Unit_hipPointerSetAttribute_Positive_SyncMemops", +======= + "Unit_tex2DLod_Positive_ReadModeElementType", + "Unit_tex2DLod_Positive_ReadModeNormalizedFloat", + "Unit_tex2DLayeredLod_Positive_ReadModeElementType", + "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat", + "Unit_tex2DGrad_Positive_ReadModeElementType", + "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat", + "Unit_tex2DLayeredGrad_Positive_ReadModeElementType", + "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat", +>>>>>>> ab1704d9 (Merge branch 'develop' into tex2D_mipmap_tests) "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index 18881833b9..d1d56d7064 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -69,6 +69,13 @@ set(TEST_SRC texCubemapLayered.cc texCubemapLayeredLod.cc texCubemapLayeredGrad.cc + tex2Dgather.cc + tex2D.cc + tex2DLayered.cc + tex2DGrad.cc + tex2DLayeredGrad.cc + tex2DLod.cc + tex2DLayeredLod.cc ) if(WIN32) diff --git a/catch/unit/texture/kernels.hh b/catch/unit/texture/kernels.hh index 2e0bf256cd..d74caa5b2a 100644 --- a/catch/unit/texture/kernels.hh +++ b/catch/unit/texture/kernels.hh @@ -95,6 +95,22 @@ __global__ void tex1DLayeredGradKernel(TexelType* const out, size_t N, hipTextur out[tid] = tex1DLayeredGrad(tex_obj, x, layer, dx, dy); } +template +__global__ void tex2DgatherKernel(TexelType* const out, int comp, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2Dgather(tex_obj, x, y, comp); +} + template __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, hipTextureObject_t tex_obj, size_t width, size_t height, @@ -111,6 +127,73 @@ __global__ void tex2DKernel(TexelType* const out, size_t N_x, size_t N_y, out[tid_y * N_x + tid_x] = tex2D(tex_obj, x, y); } +template +__global__ void tex2DGradKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float2 dx, + float2 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DGrad(tex_obj, x, y, dx, dy); +} + +template +__global__ void tex2DLayeredGradKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float layer, + float2 dx, float2 dy) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLayeredGrad(tex_obj, x, y, layer, dx, dy); +} + +template +__global__ void tex2DLodKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLod(tex_obj, x, y, level); +} + +template +__global__ void tex2DLayeredLodKernel(TexelType* const out, size_t N_x, size_t N_y, + hipTextureObject_t tex_obj, size_t width, size_t height, + size_t num_subdivisions, bool normalized_coords, int layer, + float level) { + const auto tid_x = blockIdx.x * blockDim.x + threadIdx.x; + if (tid_x >= N_x) return; + + const auto tid_y = blockIdx.y * blockDim.y + threadIdx.y; + if (tid_y >= N_y) return; + + float x = GetCoordinate(tid_x, N_x, width, num_subdivisions, normalized_coords); + float y = GetCoordinate(tid_y, N_y, height, num_subdivisions, normalized_coords); + + out[tid_y * N_x + tid_x] = tex2DLayeredLod(tex_obj, x, y, layer, level); +} + template __global__ void tex3DKernel(TexelType* const out, size_t N_x, size_t N_y, size_t N_z, hipTextureObject_t tex_obj, size_t width, size_t height, size_t depth, diff --git a/catch/unit/texture/tex2D.cc b/catch/unit/texture/tex2D.cc index 79d6055ede..7b31a03944 100644 --- a/catch/unit/texture/tex2D.cc +++ b/catch/unit/texture/tex2D.cc @@ -50,7 +50,9 @@ THE SOFTWARE. */ TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { - TextureTestParams params = {0}; + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; params.extent = make_hipExtent(16, 4, 0); params.num_subdivisions = 4; params.GenerateTextureDesc(); @@ -75,29 +77,27 @@ TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeElementType", "", char, unsigned fixture.LoadOutput(); - for (auto j = 0u; j < params.NumItersY(); ++j) { - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); - float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); - INFO("i: " << i); - INFO("j: " << j); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); - INFO("x: " << std::fixed << std::setprecision(16) << x); - INFO("y: " << std::fixed << std::setprecision(16) << y); + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto index = j * params.NumItersX() + i; + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); - const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); - REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); - } + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); } } @@ -120,7 +120,9 @@ TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeElementType", "", char, unsigned */ TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { - TextureTestParams params = {0}; + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; params.extent = make_hipExtent(16, 4, 0); params.num_subdivisions = 4; params.GenerateTextureDesc(hipReadModeNormalizedFloat); @@ -145,30 +147,27 @@ TEMPLATE_TEST_CASE("Unit_tex2D_Positive_ReadModeNormalizedFloat", "", char, unsi fixture.LoadOutput(); - for (auto j = 0u; j < params.NumItersY(); ++j) { - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); - float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersY(); - INFO("i: " << i); - INFO("j: " << j); - INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); - INFO("x: " << std::fixed << std::setprecision(16) << x); - INFO("y: " << std::fixed << std::setprecision(16) << y); + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto index = j * params.NumItersX() + i; + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); - auto ref_val = - Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); - REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); - } + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); } } \ No newline at end of file diff --git a/catch/unit/texture/tex2DGrad.cc b/catch/unit/texture/tex2DGrad.cc new file mode 100644 index 0000000000..939a6a2543 --- /dev/null +++ b/catch/unit/texture/tex2DGrad.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DGrad tex2DGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DGrad` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DGrad` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayered.cc b/catch/unit/texture/tex2DLayered.cc index b05a2e0a32..4929a5b3e0 100644 --- a/catch/unit/texture/tex2DLayered.cc +++ b/catch/unit/texture/tex2DLayered.cc @@ -50,7 +50,9 @@ THE SOFTWARE. */ TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { - TextureTestParams params = {0}; + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; params.extent = make_hipExtent(16, 4, 0); params.layers = 2; params.num_subdivisions = 4; @@ -78,30 +80,28 @@ TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeElementType", "", char, u fixture.LoadOutput(); - for (auto j = 0u; j < params.NumItersY(); ++j) { - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); - float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); - INFO("Layer: " << layer); - INFO("i: " << i); - INFO("j: " << j); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); - INFO("x: " << std::fixed << std::setprecision(16) << x); - INFO("y: " << std::fixed << std::setprecision(16) << y); + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto index = j * params.NumItersX() + i; + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); - const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); - REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); - } + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); } } } @@ -125,7 +125,9 @@ TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeElementType", "", char, u */ TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { - TextureTestParams params = {0}; + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; params.extent = make_hipExtent(16, 4, 0); params.layers = 2; params.num_subdivisions = 4; @@ -153,31 +155,29 @@ TEMPLATE_TEST_CASE("Unit_tex2DLayered_Positive_ReadModeNormalizedFloat", "", cha fixture.LoadOutput(); - for (auto j = 0u; j < params.NumItersY(); ++j) { - for (auto i = 0u; i < params.NumItersX(); ++i) { - float x = GetCoordinate(i, params.NumItersX(), params.Width(), params.num_subdivisions, - params.tex_desc.normalizedCoords); - float y = GetCoordinate(j, params.NumItersY(), params.Height(), params.num_subdivisions, - params.tex_desc.normalizedCoords); + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); - INFO("Layer: " << layer); - INFO("i: " << i); - INFO("j: " << j); - INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); - INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); - INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); - INFO("x: " << std::fixed << std::setprecision(16) << x); - INFO("y: " << std::fixed << std::setprecision(16) << y); + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); - auto index = j * params.NumItersX() + i; + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); - auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), - NormalizeInteger); - REQUIRE(ref_val.x == fixture.out_alloc_h[index].x); - REQUIRE(ref_val.y == fixture.out_alloc_h[index].y); - REQUIRE(ref_val.z == fixture.out_alloc_h[index].z); - REQUIRE(ref_val.w == fixture.out_alloc_h[index].w); - } + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); } } } \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayeredGrad.cc b/catch/unit/texture/tex2DLayeredGrad.cc new file mode 100644 index 0000000000..d58920bf56 --- /dev/null +++ b/catch/unit/texture/tex2DLayeredGrad.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayeredGrad tex2DLayeredGrad + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredGrad` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredGrad` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredGrad.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, + unsigned char, short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredGradKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, float2{0.5f, 0.5f}, float2{0.5f, 0.5f}); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLayeredLod.cc b/catch/unit/texture/tex2DLayeredLod.cc new file mode 100644 index 0000000000..e51000c204 --- /dev/null +++ b/catch/unit/texture/tex2DLayeredLod.cc @@ -0,0 +1,183 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLayeredLod tex2DLayeredLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredLod` and read mode set to `hipReadModeElementType`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, + short, unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLayeredLod` and read mode set to + * `hipReadModeNormalizedFloat`. The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLayeredLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, + short, unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.layers = 2; + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + for (auto layer = 0u; layer < params.layers; ++layer) { + tex2DLayeredLodKernel><<>>( + fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords, + layer, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Layer: " << layer); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = Vec4Map(fixture.tex_h.Tex2DLayered(x, y, layer, params.tex_desc), + NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2DLod.cc b/catch/unit/texture/tex2DLod.cc new file mode 100644 index 0000000000..e875e09133 --- /dev/null +++ b/catch/unit/texture/tex2DLod.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2DLod tex2DLod + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLod` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLod_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DLodKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2D(x, y, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2DLod` and read mode set to `hipReadModeNormalizedFloat`. + * The test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2DLod.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_tex2DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, + unsigned short) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(hipReadModeNormalizedFloat); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + tex2DLodKernel> + <<>>(fixture.out_alloc_d.ptr(), params.NumItersX(), params.NumItersY(), + fixture.tex.object(), params.Width(), params.Height(), + params.num_subdivisions, params.tex_desc.normalizedCoords, 0); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + auto ref_val = + Vec4Map(fixture.tex_h.Tex2D(x, y, params.tex_desc), NormalizeInteger); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/tex2Dgather.cc b/catch/unit/texture/tex2Dgather.cc new file mode 100644 index 0000000000..6d1c262097 --- /dev/null +++ b/catch/unit/texture/tex2Dgather.cc @@ -0,0 +1,104 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "kernels.hh" +#include "test_fixture.hh" + +/** + * @addtogroup tex2Dgather tex2Dgather + * @{ + * @ingroup TextureTest + */ + +/** + * Test Description + * ------------------------ + * - Test texture fetching with `tex2Dgather` and read mode set to `hipReadModeElementType`. The + * test is performed with: + * - normalized coordinates + * - non-normalized coordinates + * - Nearest-point sampling + * - Linear filtering + * - All combinations of different addressing modes. + * Test source + * ------------------------ + * - unit/texture/tex2Dgather.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_tex2Dgather_Positive_ReadModeElementType", "", char, unsigned char, short, + unsigned short, int, unsigned int, float) { + CHECK_IMAGE_SUPPORT; + + TextureTestParams params = {}; + params.extent = make_hipExtent(16, 4, 0); + params.num_subdivisions = 4; + params.GenerateTextureDesc(); + + TextureTestFixture fixture{params}; + + const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(32, params.NumItersX()); + const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(32, params.NumItersY()); + + dim3 dim_grid; + dim_grid.x = num_blocks_x; + dim_grid.y = num_blocks_y; + + dim3 dim_block; + dim_block.x = num_threads_x; + dim_block.y = num_threads_y; + + const int comp = GENERATE(0, 1, 2, 3); + + tex2DgatherKernel><<>>( + fixture.out_alloc_d.ptr(), comp, params.NumItersX(), params.NumItersY(), fixture.tex.object(), + params.Width(), params.Height(), params.num_subdivisions, params.tex_desc.normalizedCoords); + HIP_CHECK(hipGetLastError()); + + fixture.LoadOutput(); + + for (auto i = 0u; i < params.NumItersX() * params.NumItersY(); ++i) { + float x = i % params.NumItersX(); + float y = i / params.NumItersX(); + + x = GetCoordinate(x, params.NumItersX(), params.Width(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + y = GetCoordinate(y, params.NumItersY(), params.Height(), params.num_subdivisions, + params.tex_desc.normalizedCoords); + + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); + INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); + INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); + INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); + INFO("x: " << std::fixed << std::setprecision(16) << x); + INFO("y: " << std::fixed << std::setprecision(16) << y); + + const auto ref_val = fixture.tex_h.Tex2DGather(x, y, comp, params.tex_desc); + REQUIRE(ref_val.x == fixture.out_alloc_h[i].x); + REQUIRE(ref_val.y == fixture.out_alloc_h[i].y); + REQUIRE(ref_val.z == fixture.out_alloc_h[i].z); + REQUIRE(ref_val.w == fixture.out_alloc_h[i].w); + } +} \ No newline at end of file diff --git a/catch/unit/texture/texture_reference.hh b/catch/unit/texture/texture_reference.hh index 45f7dd8efa..b66fc4a045 100644 --- a/catch/unit/texture/texture_reference.hh +++ b/catch/unit/texture/texture_reference.hh @@ -35,6 +35,42 @@ template class TextureReference { return Tex1DLayered(x, 0, tex_desc); } + TexelType Tex2DGather(float x, float y, int comp, const hipTextureDesc& tex_desc) const { + x = tex_desc.normalizedCoords ? x * extent_.width : x; + y = tex_desc.normalizedCoords ? y * extent_.height : y; + + const auto [i, alpha] = GetLinearFilteringParams(x); + const auto [j, beta] = GetLinearFilteringParams(y); + + const auto T_i0j0 = Sample(i, j, 0, tex_desc.addressMode); + const auto T_i1j0 = Sample(i + 1.0f, j, 0, tex_desc.addressMode); + const auto T_i0j1 = Sample(i, j + 1.0f, 0, tex_desc.addressMode); + const auto T_i1j1 = Sample(i + 1.0f, j + 1.0f, 0, tex_desc.addressMode); + + const auto IndexVec4 = [](auto vec, int comp) { + switch (comp) { + case 0: + return vec.x; + case 1: + return vec.y; + case 2: + return vec.z; + case 3: + return vec.w; + default: + throw std::invalid_argument("Invalid gather comp"); + } + }; + + TexelType texel; + texel.x = IndexVec4(T_i0j1, comp); + texel.y = IndexVec4(T_i1j1, comp); + texel.z = IndexVec4(T_i1j0, comp); + texel.w = IndexVec4(T_i0j0, comp); + + return texel; + } + TexelType Tex2D(float x, float y, const hipTextureDesc& tex_desc) const { return Tex2DLayered(x, y, 0, tex_desc); } From 2691895821047056a869ddfd12758ff26453699e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:19:14 +0100 Subject: [PATCH 48/71] EXSWHTEC-359 - Implement tests for 3D mipmapped texture device functions #433 Change-Id: I1933d498f518b352b91b1c37b24c62d0cac309eb --- catch/hipTestMain/config/config_amd_linux | 11 ----------- catch/unit/texture/CMakeLists.txt | 3 +++ catch/unit/texture/tex3D.cc | 2 ++ catch/unit/texture/tex3DGrad.cc | 10 ++++++---- catch/unit/texture/tex3DLod.cc | 10 ++++++---- 5 files changed, 17 insertions(+), 19 deletions(-) diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 99c33f7834..66107df9e1 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -128,19 +128,8 @@ "Unit_hipEventIpc", "=== SWDEV-427101:Below test fails randomly in PSDB ===", "Unit_deviceAllocation_InOneThread_AccessInAllThreads", -<<<<<<< HEAD "=== Below test is disabled due to defect EXSWHTEC-347 ===", "Unit_hipPointerSetAttribute_Positive_SyncMemops", -======= - "Unit_tex2DLod_Positive_ReadModeElementType", - "Unit_tex2DLod_Positive_ReadModeNormalizedFloat", - "Unit_tex2DLayeredLod_Positive_ReadModeElementType", - "Unit_tex2DLayeredLod_Positive_ReadModeNormalizedFloat", - "Unit_tex2DGrad_Positive_ReadModeElementType", - "Unit_tex2DGrad_Positive_ReadModeNormalizedFloat", - "Unit_tex2DLayeredGrad_Positive_ReadModeElementType", - "Unit_tex2DLayeredGrad_Positive_ReadModeNormalizedFloat", ->>>>>>> ab1704d9 (Merge branch 'develop' into tex2D_mipmap_tests) "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", diff --git a/catch/unit/texture/CMakeLists.txt b/catch/unit/texture/CMakeLists.txt index d1d56d7064..3c0d5710d8 100644 --- a/catch/unit/texture/CMakeLists.txt +++ b/catch/unit/texture/CMakeLists.txt @@ -76,6 +76,9 @@ set(TEST_SRC tex2DLayeredGrad.cc tex2DLod.cc tex2DLayeredLod.cc + tex3D.cc + tex3DLod.cc + tex3DGrad.cc ) if(WIN32) diff --git a/catch/unit/texture/tex3D.cc b/catch/unit/texture/tex3D.cc index ecfdd535bb..2aaefe84d3 100644 --- a/catch/unit/texture/tex3D.cc +++ b/catch/unit/texture/tex3D.cc @@ -94,6 +94,7 @@ TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeElementType", "", char, unsigned z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -173,6 +174,7 @@ TEMPLATE_TEST_CASE("Unit_tex3D_Positive_ReadModeNormalizedFloat", "", char, unsi z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/tex3DGrad.cc b/catch/unit/texture/tex3DGrad.cc index a3f3d8ddfd..b810359fc1 100644 --- a/catch/unit/texture/tex3DGrad.cc +++ b/catch/unit/texture/tex3DGrad.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex3DGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -57,7 +57,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsi params.num_subdivisions = 2; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -94,6 +94,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsi z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -125,7 +126,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeElementType", "", char, unsi * - unit/texture/tex3DGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -136,7 +137,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", "", char, params.num_subdivisions = 2; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -173,6 +174,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DGrad_Positive_ReadModeNormalizedFloat", "", char, z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/tex3DLod.cc b/catch/unit/texture/tex3DLod.cc index bd5ee5f7a5..e514c9b9a3 100644 --- a/catch/unit/texture/tex3DLod.cc +++ b/catch/unit/texture/tex3DLod.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/tex3DLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -57,7 +57,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsig params.num_subdivisions = 2; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -94,6 +94,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsig z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -125,7 +126,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeElementType", "", char, unsig * - unit/texture/tex3DLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -136,7 +137,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeNormalizedFloat", "", char, u params.num_subdivisions = 2; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -173,6 +174,7 @@ TEMPLATE_TEST_CASE("Unit_tex3DLod_Positive_ReadModeNormalizedFloat", "", char, u z = GetCoordinate(z, params.NumItersZ(), params.Depth(), params.num_subdivisions, params.tex_desc.normalizedCoords); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); From baf4c75311140d574b020e465c64635b5818d4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:07:05 +0100 Subject: [PATCH 49/71] EXSWHTEC-360 - Implement tests for Cubemap mipmapped texture device functions #434 Change-Id: I129f5fe8d8769fabbf67bf10e0c994fdf05358e2 --- catch/unit/texture/texCubemap.cc | 2 ++ catch/unit/texture/texCubemapGrad.cc | 10 ++++++---- catch/unit/texture/texCubemapLayeredGrad.cc | 10 ++++++---- catch/unit/texture/texCubemapLayeredLod.cc | 10 ++++++---- catch/unit/texture/texCubemapLod.cc | 10 ++++++---- 5 files changed, 26 insertions(+), 16 deletions(-) diff --git a/catch/unit/texture/texCubemap.cc b/catch/unit/texture/texCubemap.cc index ade3775c52..572283a4a7 100644 --- a/catch/unit/texture/texCubemap.cc +++ b/catch/unit/texture/texCubemap.cc @@ -95,6 +95,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeElementType", "", char, uns INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -179,6 +180,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemap_Positive_ReadModeNormalizedFloat", "", char, INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/texCubemapGrad.cc b/catch/unit/texture/texCubemapGrad.cc index b2024737b3..75a69f698c 100644 --- a/catch/unit/texture/texCubemapGrad.cc +++ b/catch/unit/texture/texCubemapGrad.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/texCubemapGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -58,7 +58,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, params.cubemap = true; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -95,6 +95,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -130,7 +131,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeElementType", "", char, * - unit/texture/texCubemapGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -142,7 +143,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat", "", c params.cubemap = true; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -179,6 +180,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapGrad_Positive_ReadModeNormalizedFloat", "", c INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/texCubemapLayeredGrad.cc b/catch/unit/texture/texCubemapLayeredGrad.cc index 96dd0415b6..68c5efdb86 100644 --- a/catch/unit/texture/texCubemapLayeredGrad.cc +++ b/catch/unit/texture/texCubemapLayeredGrad.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/texCubemapLayeredGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -59,7 +59,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "" params.cubemap = true; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -98,6 +98,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "" INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -134,7 +135,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeElementType", "" * - unit/texture/texCubemapLayeredGrad.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -147,7 +148,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat" params.cubemap = true; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -186,6 +187,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredGrad_Positive_ReadModeNormalizedFloat" INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/texCubemapLayeredLod.cc b/catch/unit/texture/texCubemapLayeredLod.cc index fd9db2c0b7..af305d1e6c 100644 --- a/catch/unit/texture/texCubemapLayeredLod.cc +++ b/catch/unit/texture/texCubemapLayeredLod.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/texCubemapLayeredLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -59,7 +59,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", params.cubemap = true; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -98,6 +98,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -134,7 +135,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeElementType", "", * - unit/texture/texCubemapLayeredLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -147,7 +148,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat", params.cubemap = true; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -186,6 +187,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLayeredLod_Positive_ReadModeNormalizedFloat", INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); diff --git a/catch/unit/texture/texCubemapLod.cc b/catch/unit/texture/texCubemapLod.cc index 0d33048197..e35f8f7b6c 100644 --- a/catch/unit/texture/texCubemapLod.cc +++ b/catch/unit/texture/texCubemapLod.cc @@ -46,7 +46,7 @@ THE SOFTWARE. * - unit/texture/texCubemapLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, unsigned char, short, unsigned short, int, unsigned int, float) { @@ -58,7 +58,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, params.cubemap = true; params.GenerateTextureDesc(); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -95,6 +95,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); @@ -130,7 +131,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeElementType", "", char, * - unit/texture/texCubemapLod.cc * Test requirements * ------------------------ - * - HIP_VERSION >= 5.2 + * - HIP_VERSION >= 5.7 */ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeNormalizedFloat", "", char, unsigned char, short, unsigned short) { @@ -142,7 +143,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeNormalizedFloat", "", ch params.cubemap = true; params.GenerateTextureDesc(hipReadModeNormalizedFloat); - TextureTestFixture fixture{params}; + TextureTestFixture fixture{params}; const auto [num_threads_x, num_blocks_x] = GetLaunchConfig(10, params.NumItersX()); const auto [num_threads_y, num_blocks_y] = GetLaunchConfig(10, params.NumItersY()); @@ -179,6 +180,7 @@ TEMPLATE_TEST_CASE("Unit_texCubemapLod_Positive_ReadModeNormalizedFloat", "", ch INFO("i: " << i); INFO("j: " << j); INFO("k: " << k); + INFO("Filtering mode: " << FilteringModeToString(params.tex_desc.filterMode)); INFO("Normalized coordinates: " << std::boolalpha << params.tex_desc.normalizedCoords); INFO("Address mode X: " << AddressModeToString(params.tex_desc.addressMode[0])); INFO("Address mode Y: " << AddressModeToString(params.tex_desc.addressMode[1])); From d1500f261269e40ae1eceec1b5884558aa570efc Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 2 Feb 2024 12:16:12 +0530 Subject: [PATCH 50/71] EXSWHTEC-57 - Implement tests for Import/Export hipMemPool APIs #445 Change-Id: Ia40ead5612819d1e87a4c526adf77be20ebc4ff2 --- catch/unit/CMakeLists.txt | 1 + catch/unit/stream_ordered/CMakeLists.txt | 30 + .../stream_ordered/helper_multiprocess.cc | 518 ++++++++++++++++++ .../stream_ordered/helper_multiprocess.hh | 119 ++++ .../stream_ordered/hipMemPoolExportImport.cc | 491 +++++++++++++++++ .../hipMemPoolExportImportIPC.cc | 419 ++++++++++++++ 6 files changed, 1578 insertions(+) create mode 100644 catch/unit/stream_ordered/CMakeLists.txt create mode 100644 catch/unit/stream_ordered/helper_multiprocess.cc create mode 100644 catch/unit/stream_ordered/helper_multiprocess.hh create mode 100644 catch/unit/stream_ordered/hipMemPoolExportImport.cc create mode 100644 catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 37f8b73cc5..59f8f6ad47 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory(rtc) add_subdirectory(deviceLib) add_subdirectory(graph) add_subdirectory(memory) +add_subdirectory(stream_ordered) add_subdirectory(stream) add_subdirectory(event) add_subdirectory(occupancy) diff --git a/catch/unit/stream_ordered/CMakeLists.txt b/catch/unit/stream_ordered/CMakeLists.txt new file mode 100644 index 0000000000..e9a5e56a8d --- /dev/null +++ b/catch/unit/stream_ordered/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Common Tests - Test independent of all platforms +set(COMMON_SHARED_SRC helper_multiprocess.cc) + +set(TEST_SRC + hipMemPoolExportImport.cc + hipMemPoolExportImportIPC.cc) + +hip_add_exe_to_target(NAME StreamOrderedTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) diff --git a/catch/unit/stream_ordered/helper_multiprocess.cc b/catch/unit/stream_ordered/helper_multiprocess.cc new file mode 100644 index 0000000000..4ab32e64e7 --- /dev/null +++ b/catch/unit/stream_ordered/helper_multiprocess.cc @@ -0,0 +1,518 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "helper_multiprocess.hh" +#include +#include + +int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + info->size = sz; + info->shmHandle = + CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)sz, name); + if (info->shmHandle == 0) { + return GetLastError(); + } + + info->addr = MapViewOfFile(info->shmHandle, FILE_MAP_ALL_ACCESS, 0, 0, sz); + if (info->addr == NULL) { + return GetLastError(); + } + + return 0; +#else + int status = 0; + + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR | O_CREAT, 0777); + if (info->shmFd < 0) { + return errno; + } + + status = ftruncate(info->shmFd, sz); + if (status != 0) { + return status; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +#endif +} + +int sharedMemoryOpen(const char* name, size_t sz, sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + info->size = sz; + + info->shmHandle = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, name); + if (info->shmHandle == 0) { + return GetLastError(); + } + + info->addr = MapViewOfFile(info->shmHandle, FILE_MAP_ALL_ACCESS, 0, 0, sz); + if (info->addr == NULL) { + return GetLastError(); + } + + return 0; +#else + info->size = sz; + + info->shmFd = shm_open(name, O_RDWR, 0777); + if (info->shmFd < 0) { + return errno; + } + + info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0); + if (info->addr == NULL) { + return errno; + } + + return 0; +#endif +} + +void sharedMemoryClose(sharedMemoryInfo* info) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + if (info->addr) { + UnmapViewOfFile(info->addr); + } + if (info->shmHandle) { + CloseHandle(info->shmHandle); + } +#else + if (info->addr) { + munmap(info->addr, info->size); + } + if (info->shmFd) { + close(info->shmFd); + } +#endif +} + +int spawnProcess(Process* process, const char* app, char* const* args) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + STARTUPINFO si = {0}; + BOOL status; + size_t arglen = 0; + size_t argIdx = 0; + std::string arg_string; + memset(process, 0, sizeof(*process)); + + while (*args) { + arg_string.append(*args).append(1, ' '); + args++; + } + + status = + CreateProcess(app, LPSTR(arg_string.c_str()), NULL, NULL, FALSE, 0, NULL, NULL, &si, process); + + return status ? 0 : GetLastError(); +#else + *process = fork(); + if (*process == 0) { + if (0 > execvp(app, args)) { + return errno; + } + } else if (*process < 0) { + return errno; + } + return 0; +#endif +} + +int waitProcess(Process* process) { +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + DWORD exitCode; + WaitForSingleObject(process->hProcess, INFINITE); + GetExitCodeProcess(process->hProcess, &exitCode); + CloseHandle(process->hProcess); + CloseHandle(process->hThread); + return (int)exitCode; +#else + int status = 0; + do { + if (0 > waitpid(*process, &status, 0)) { + return errno; + } + } while (!WIFEXITED(status)); + return WEXITSTATUS(status); +#endif +} + +#if defined(__linux__) +int ipcCreateSocket(ipcHandle*& handle, const char* name, const std::vector& processes) { + int server_fd; + struct sockaddr_un servaddr; + + handle = new ipcHandle; + memset(handle, 0, sizeof(*handle)); + handle->socket = -1; + handle->socketName = NULL; + + // Creating socket file descriptor + if ((server_fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == 0) { + perror("IPC failure: Socket creation failed"); + return -1; + } + + unlink(name); + bzero(&servaddr, sizeof(servaddr)); + servaddr.sun_family = AF_UNIX; + + size_t len = strlen(name); + if (len > (sizeof(servaddr.sun_path) - 1)) { + perror("IPC failure: Cannot bind provided name to socket. Name too large"); + return -1; + } + + strncpy(servaddr.sun_path, name, len); + + if (bind(server_fd, (struct sockaddr*)&servaddr, SUN_LEN(&servaddr)) < 0) { + perror("IPC failure: Binding socket failed"); + return -1; + } + + handle->socketName = new char[strlen(name) + 1]; + strcpy(handle->socketName, name); + handle->socket = server_fd; + return 0; +} + +int ipcOpenSocket(ipcHandle*& handle) { + int sock = 0; + struct sockaddr_un cliaddr; + + handle = new ipcHandle; + memset(handle, 0, sizeof(*handle)); + + if ((sock = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0) { + perror("IPC failure:Socket creation error"); + return -1; + } + + bzero(&cliaddr, sizeof(cliaddr)); + cliaddr.sun_family = AF_UNIX; + char temp[10]; + + // Create unique name for the socket. + sprintf(temp, "%u", getpid()); + + strcpy(cliaddr.sun_path, temp); + if (bind(sock, (struct sockaddr*)&cliaddr, sizeof(cliaddr)) < 0) { + perror("IPC failure: Binding socket failed"); + return -1; + } + + handle->socket = sock; + handle->socketName = new char[strlen(temp) + 1]; + strcpy(handle->socketName, temp); + + return 0; +} + +int ipcCloseSocket(ipcHandle* handle) { + if (!handle) { + return -1; + } + + if (handle->socketName) { + unlink(handle->socketName); + delete[] handle->socketName; + } + close(handle->socket); + delete handle; + return 0; +} + +int ipcRecvShareableHandle(ipcHandle* handle, ShareableHandle* shHandle) { + struct msghdr msg = {0}; + struct iovec iov[1]; + + // Union to guarantee alignment requirements for control array + union { + struct cmsghdr cm; + char control[CMSG_SPACE(sizeof(int))]; + } control_un; + + struct cmsghdr* cmptr; + int receivedfd; + char dummy_buffer[1]; + + msg.msg_control = control_un.control; + msg.msg_controllen = sizeof(control_un.control); + + iov[0].iov_base = (void*)dummy_buffer; + iov[0].iov_len = sizeof(dummy_buffer); + + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + if (recvmsg(handle->socket, &msg, 0) <= 0) { + perror("IPC failure: Receiving data over socket failed"); + return -1; + } + + if (((cmptr = CMSG_FIRSTHDR(&msg)) != NULL) && (cmptr->cmsg_len == CMSG_LEN(sizeof(int)))) { + if ((cmptr->cmsg_level != SOL_SOCKET) || (cmptr->cmsg_type != SCM_RIGHTS)) { + return -1; + } + + memmove(&receivedfd, CMSG_DATA(cmptr), sizeof(receivedfd)); + *(int*)shHandle = receivedfd; + } else { + return -1; + } + + return 0; +} + +int ipcRecvDataFromClient(ipcHandle* serverHandle, void* data, size_t size) { + ssize_t readResult; + struct sockaddr_un cliaddr; + socklen_t len = sizeof(cliaddr); + + readResult = recvfrom(serverHandle->socket, data, size, 0, (struct sockaddr*)&cliaddr, &len); + if (readResult == -1) { + perror("IPC failure: Receiving data over socket failed"); + return -1; + } + return 0; +} + +int ipcSendDataToServer(ipcHandle* handle, const char* serverName, const void* data, size_t size) { + ssize_t sendResult; + struct sockaddr_un serveraddr; + + bzero(&serveraddr, sizeof(serveraddr)); + serveraddr.sun_family = AF_UNIX; + strncpy(serveraddr.sun_path, serverName, sizeof(serveraddr.sun_path) - 1); + + sendResult = + sendto(handle->socket, data, size, 0, (struct sockaddr*)&serveraddr, sizeof(serveraddr)); + if (sendResult <= 0) { + perror("IPC failure: Sending data over socket failed"); + } + + return 0; +} + +int ipcSendShareableHandle(ipcHandle* handle, const std::vector& shareableHandles, + Process process, int data) { + struct msghdr msg; + struct iovec iov[1]; + + union { + struct cmsghdr cm; + char control[CMSG_SPACE(sizeof(int))]; + } control_un; + + struct cmsghdr* cmptr; + struct sockaddr_un cliaddr; + + // Construct client address to send this SHareable handle to + bzero(&cliaddr, sizeof(cliaddr)); + cliaddr.sun_family = AF_UNIX; + char temp[10]; + sprintf(temp, "%u", process); + strcpy(cliaddr.sun_path, temp); + + // Send corresponding shareable handle to the client + int sendfd = (int)shareableHandles[data]; + + msg.msg_control = control_un.control; + msg.msg_controllen = sizeof(control_un.control); + + cmptr = CMSG_FIRSTHDR(&msg); + cmptr->cmsg_len = CMSG_LEN(sizeof(int)); + cmptr->cmsg_level = SOL_SOCKET; + cmptr->cmsg_type = SCM_RIGHTS; + + memmove(CMSG_DATA(cmptr), &sendfd, sizeof(sendfd)); + + msg.msg_name = (void*)&cliaddr; + msg.msg_namelen = sizeof(struct sockaddr_un); + + iov[0].iov_base = (void*)""; + iov[0].iov_len = 1; + msg.msg_iov = iov; + msg.msg_iovlen = 1; + + ssize_t sendResult = sendmsg(handle->socket, &msg, 0); + if (sendResult <= 0) { + perror("IPC failure: Sending data over socket failed"); + return -1; + } + + return 0; +} + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes) { + // Send all shareable handles to every single process. + for (int i = 0; i < shareableHandles.size(); i++) { + for (int j = 0; j < processes.size(); j++) { + checkIpcErrors(ipcSendShareableHandle(handle, shareableHandles, processes[j], i)); + } + } + return 0; +} + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles) { + for (int i = 0; i < shareableHandles.size(); i++) { + checkIpcErrors(ipcRecvShareableHandle(handle, &shareableHandles[i])); + } + return 0; +} + +int ipcCloseShareableHandle(ShareableHandle shHandle) { return close(shHandle); } + +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +// Generic name to build individual Mailslot names by appending process ids. +LPTSTR SlotName = (LPTSTR)TEXT("\\\\.\\mailslot\\sample_mailslot_"); + +int ipcCreateSocket(ipcHandle*& handle, const char* name, const std::vector& processes) { + handle = new ipcHandle; + handle->hMailslot.resize(processes.size()); + + // Open Mailslots of all clients and store respective handles. + for (int i = 0; i < handle->hMailslot.size(); ++i) { + std::basic_string childSlotName(SlotName); + char tempBuf[20]; + _itoa_s(processes[i].dwProcessId, tempBuf, 10); + childSlotName += TEXT(tempBuf); + + HANDLE hFile = + CreateFile(TEXT(childSlotName.c_str()), GENERIC_WRITE, FILE_SHARE_READ, + (LPSECURITY_ATTRIBUTES)NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, (HANDLE)NULL); + if (hFile == INVALID_HANDLE_VALUE) { + printf("IPC failure: Opening Mailslot by CreateFile failed with %d\n", GetLastError()); + return -1; + } + handle->hMailslot[i] = hFile; + } + return 0; +} + +int ipcOpenSocket(ipcHandle*& handle) { + handle = new ipcHandle; + HANDLE hSlot; + + std::basic_string clientSlotName(SlotName); + char tempBuf[20]; + _itoa_s(GetCurrentProcessId(), tempBuf, 10); + clientSlotName += TEXT(tempBuf); + + hSlot = CreateMailslot((LPSTR)clientSlotName.c_str(), 0, MAILSLOT_WAIT_FOREVER, + (LPSECURITY_ATTRIBUTES)NULL); + if (hSlot == INVALID_HANDLE_VALUE) { + printf("IPC failure: CreateMailslot failed for client with %d\n", GetLastError()); + return -1; + } + + handle->hMailslot.push_back(hSlot); + return 0; +} + +int ipcSendData(HANDLE mailslot, const void* data, size_t sz) { + BOOL result; + DWORD cbWritten; + + result = WriteFile(mailslot, data, (DWORD)sz, &cbWritten, (LPOVERLAPPED)NULL); + if (!result) { + printf("IPC failure: WriteFile failed with %d.\n", GetLastError()); + return -1; + } + return 0; +} + +int ipcRecvData(ipcHandle* handle, void* data, size_t sz) { + DWORD cbRead = 0; + + if (!ReadFile(handle->hMailslot[0], data, (DWORD)sz, &cbRead, NULL)) { + printf("IPC failure: ReadFile failed with %d.\n", GetLastError()); + return -1; + } + + if (sz != (size_t)cbRead) { + printf("IPC failure: ReadFile didn't receive the expected number of bytes\n"); + return -1; + } + + return 0; +} + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes) { + // Send all shareable handles to every single process. + for (int i = 0; i < processes.size(); i++) { + HANDLE hProcess = OpenProcess(PROCESS_DUP_HANDLE, FALSE, processes[i].dwProcessId); + if (hProcess == INVALID_HANDLE_VALUE) { + printf("IPC failure: OpenProcess failed (%d)\n", GetLastError()); + return -1; + } + + for (int j = 0; j < shareableHandles.size(); j++) { + HANDLE hDup = INVALID_HANDLE_VALUE; + // Duplicate the handle into the target process's space + if (!DuplicateHandle(GetCurrentProcess(), shareableHandles[j], hProcess, &hDup, 0, FALSE, + DUPLICATE_SAME_ACCESS)) { + printf("IPC failure: DuplicateHandle failed (%d)\n", GetLastError()); + return -1; + } + checkIpcErrors(ipcSendData(handle->hMailslot[i], &hDup, sizeof(hDup))); + } + CloseHandle(hProcess); + } + return 0; +} + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles) { + for (int i = 0; i < shareableHandles.size(); i++) { + checkIpcErrors(ipcRecvData(handle, &shareableHandles[i], sizeof(shareableHandles[i]))); + } + return 0; +} + +int ipcCloseSocket(ipcHandle* handle) { + for (int i = 0; i < handle->hMailslot.size(); i++) { + CloseHandle(handle->hMailslot[i]); + } + delete handle; + return 0; +} + +int ipcCloseShareableHandle(ShareableHandle shHandle) { + CloseHandle(shHandle); + return 0; +} + +#endif diff --git a/catch/unit/stream_ordered/helper_multiprocess.hh b/catch/unit/stream_ordered/helper_multiprocess.hh new file mode 100644 index 0000000000..b9bb4e16ea --- /dev/null +++ b/catch/unit/stream_ordered/helper_multiprocess.hh @@ -0,0 +1,119 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HELPER_MULTIPROCESS_H +#define HELPER_MULTIPROCESS_H + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif +#include + +typedef struct sharedMemoryInfo_st { + void* addr; + size_t size; +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) + HANDLE shmHandle; +#else + int shmFd; +#endif +} sharedMemoryInfo; + +int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info); + +int sharedMemoryOpen(const char* name, size_t sz, sharedMemoryInfo* info); + +void sharedMemoryClose(sharedMemoryInfo* info); + + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +typedef PROCESS_INFORMATION Process; +#else +typedef pid_t Process; +#endif + +int spawnProcess(Process* process, const char* app, char* const* args); + +int waitProcess(Process* process); + +#define checkIpcErrors(ipcFuncResult) \ + if (ipcFuncResult == -1) { \ + fprintf(stderr, "Failure at %u %s\n", __LINE__, __FILE__); \ + exit(EXIT_FAILURE); \ + } + +#if defined(__linux__) +struct ipcHandle_st { + int socket; + char* socketName; +}; +typedef int ShareableHandle; +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +struct ipcHandle_st { + std::vector + hMailslot; // 1 Handle in case of child and `num children` Handles for parent. +}; +typedef HANDLE ShareableHandle; +#endif + +typedef struct ipcHandle_st ipcHandle; + +int ipcCreateSocket(ipcHandle*& handle, const char* name, const std::vector& processes); + +int ipcOpenSocket(ipcHandle*& handle); + +int ipcCloseSocket(ipcHandle* handle); + +int ipcRecvShareableHandles(ipcHandle* handle, std::vector& shareableHandles); + +int ipcSendShareableHandles(ipcHandle* handle, const std::vector& shareableHandles, + const std::vector& processes); + +int ipcCloseShareableHandle(ShareableHandle shHandle); + +#endif // HELPER_MULTIPROCESS_H diff --git a/catch/unit/stream_ordered/hipMemPoolExportImport.cc b/catch/unit/stream_ordered/hipMemPoolExportImport.cc new file mode 100644 index 0000000000..58ada79b7c --- /dev/null +++ b/catch/unit/stream_ordered/hipMemPoolExportImport.cc @@ -0,0 +1,491 @@ +/* + Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ +#include +#include +#include +#include + +/** + * @addtogroup hipMemPoolExportToShareableHandle hipMemPoolExportToShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + * hipMemAllocationHandleType handle_type, unsigned int flags)` - Exports a memory pool to the + * requested handle type. + */ + +/** + * Test Description + * ------------------------ + * - Basic test to verify exporting/importing a shareable handle on a single device in a single + * process. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_Functional") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int shareable_handle; + hipMemPoolPtrExportData export_ptr; + void* ptr; + + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + HIP_CHECK(hipSetDevice(0)); + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&shareable_handle, mempool, handle_type, 0)); + + memset((void*)&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK(hipMemPoolExportPointer(&export_ptr, reinterpret_cast(ptr))); + + LinearAllocGuard host_ptr(LinearAllocs::hipHostMalloc, kPageSize); + + hipMemPool_t shared_mempool; + int* shared_ptr; + + HIP_CHECK(hipMemPoolImportFromShareableHandle( + &shared_mempool, reinterpret_cast(shareable_handle), handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, shared_mempool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(shared_mempool, &desc, 1)); + } + + HIP_CHECK( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), shared_mempool, &export_ptr)); + + const auto element_count = kPageSize / sizeof(int); + constexpr auto thread_count = 1024; + const auto block_count = element_count / thread_count + 1; + int expected_value = 12; + VectorSet<<>>(shared_ptr, expected_value, + element_count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + // Copy the buffer locally + HIP_CHECK(hipMemcpyAsync(host_ptr.host_ptr(), shared_ptr, kPageSize, hipMemcpyDeviceToHost, + stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + // Check if the content is as expected + ArrayFindIfNot(host_ptr.host_ptr(), expected_value, element_count); + + // Free the memory before the exporter process frees it + HIP_CHECK(hipFreeAsync(shared_ptr, stream.stream())); + + // And wait for all the queued up work to complete + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolExportToShareableHandle behavior with invalid arguments: + * -# Invalid shareable handle + * -# Invalid Memory Pool + * -# Invalid flag + * -# Invalid Memory Pool properties + * -# Invalid Memory Handle type + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportToShareableHandle_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + SECTION("Invalid shareable handle") { + HIP_CHECK_ERROR( + hipMemPoolExportToShareableHandle(nullptr, mempool, hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + int share_handle; + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid flag") { + int share_handle; + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, mempool, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool properties") { + int share_handle; + pool_props.handleTypes = hipMemHandleTypeNone; + hipMemPool_t mempool_none; + HIP_CHECK(hipMemPoolCreate(&mempool_none, &pool_props)); + + HIP_CHECK_ERROR(hipMemPoolExportToShareableHandle(&share_handle, mempool_none, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + HIP_CHECK(hipMemPoolDestroy(mempool_none)); + } + + SECTION("Invalid Memory Handle type") { + int share_handle; + + HIP_CHECK_ERROR( + hipMemPoolExportToShareableHandle(&share_handle, mempool, hipMemHandleTypeNone, 0), + hipErrorInvalidValue); + } + + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolExportToShareableHandle. + * @} + */ + + +/** + * @addtogroup hipMemPoolImportFromShareableHandle hipMemPoolImportFromShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolImportFromShareableHandle(hipMemPool_t* mem_pool, void* shared_handle, + * hipMemAllocationHandleType handle_type,unsigned int flags)` - Imports a memory pool from a shared + * handle. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolImportFromShareableHandle behavior with invalid arguments: + * -# Invalid shareable handle + * -# Invalid Memory Pool + * -# Invalid flag + * -# Invalid Memory Handle type + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolImportFromShareableHandle_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemPool_t shared_mempool = nullptr; + + SECTION("Invalid shareable handle") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(nullptr, &share_handle, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + + SECTION("Invalid flag") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, &share_handle, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Handle type") { + HIP_CHECK_ERROR(hipMemPoolImportFromShareableHandle(&shared_mempool, &share_handle, + hipMemHandleTypeNone, 0), + hipErrorInvalidValue); + } +} + +/** + * End doxygen group hipMemPoolImportFromShareableHandle. + * @} + */ + + +/** + * @addtogroup hipMemPoolExportPointer hipMemPoolExportPointer + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportPointer(hipMemPoolPtrExportData* export_data, void* dev_ptr)` - Export data to share a memory pool allocation between processes. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolExportPointer behavior with invalid arguments: + * -# Invalid exported data + * -# Invalid device pointer + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportPointer_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + void* ptr; + hipMemPoolPtrExportData export_ptr; + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = handle_type; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, handle_type, 0)); + + memset(&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + + SECTION("Invalid exported data") { + HIP_CHECK_ERROR(hipMemPoolExportPointer(nullptr, reinterpret_cast(ptr)), + hipErrorInvalidValue); + } + + SECTION("Invalid device pointer") { + HIP_CHECK_ERROR(hipMemPoolExportPointer(&export_ptr, nullptr), hipErrorInvalidValue); + } + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolExportPointer. + * @} + */ + + +/** + * @addtogroup hipMemPoolImportPointer hipMemPoolImportPointer + * @{ + * @ingroup StreamOTest + * `hipMemPoolImportPointer(void** dev_ptr, hipMemPool_t mem_pool, hipMemPoolPtrExportData* export_data)` - Import a memory pool allocation from another process. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipMemPoolExportImport_Functional + * - @ref Unit_hipMemPoolExportImport_IPC_Functional + * - @ref Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipMemPoolImportPointer behavior with invalid arguments: + * -# Invalid device ptr + * -# Invalid Memory Pool + * -# Invalid exported data + * + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImport.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolImportPointer_Negative_Parameters") { + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + void* ptr; + hipMemPoolPtrExportData export_ptr; + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + StreamGuard stream(Streams::withFlags, hipStreamNonBlocking); + int share_handle; + hipMemPool_t mempool; + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = handle_type; + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = 0; + HIP_CHECK(hipMemPoolCreate(&mempool, &pool_props)); + + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, mempool, stream.stream())); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&share_handle, mempool, handle_type, 0)); + + memset((void*)&export_ptr, 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK(hipMemPoolExportPointer(&export_ptr, reinterpret_cast(ptr))); + + hipMemPool_t shared_mempool; + int* shared_ptr = nullptr; + + HIP_CHECK(hipMemPoolImportFromShareableHandle( + &shared_mempool, reinterpret_cast(share_handle), handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, shared_mempool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(shared_mempool, &desc, 1)); + } + + SECTION("Invalid device ptr") { + HIP_CHECK_ERROR(hipMemPoolImportPointer(nullptr, shared_mempool, &export_ptr), + hipErrorInvalidValue); + } + + SECTION("Invalid Memory Pool") { + HIP_CHECK_ERROR( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), nullptr, &export_ptr), + hipErrorInvalidValue); + } + + SECTION("Invalid exported data") { + HIP_CHECK_ERROR( + hipMemPoolImportPointer(reinterpret_cast(&shared_ptr), shared_mempool, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipFreeAsync(ptr, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + HIP_CHECK(hipMemPoolDestroy(mempool)); +} + +/** + * End doxygen group hipMemPoolImportPointer. + * @} + */ diff --git a/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc b/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc new file mode 100644 index 0000000000..9635f93b63 --- /dev/null +++ b/catch/unit/stream_ordered/hipMemPoolExportImportIPC.cc @@ -0,0 +1,419 @@ +/* + Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +#include "helper_multiprocess.hh" + +#include +#include +#include +#include + +/** + * @addtogroup hipMemPoolExportToShareableHandle hipMemPoolExportToShareableHandle + * @{ + * @ingroup StreamOTest + * `hipMemPoolExportToShareableHandle(void* shared_handle, hipMemPool_t mem_pool, + * hipMemAllocationHandleType handle_type, unsigned int flags)` - Exports a memory pool to the + * requested handle type. + */ + +#ifdef __linux__ + +static const char shm_name[] = "mempool_test_shm"; +static const char ipc_name[] = "mempool_test_pipe"; + +static constexpr int kMaxDevices = 8; + +typedef struct shmStruct_st { + Process processes[kMaxDevices]; + hipMemPoolPtrExportData exportPtrData[kMaxDevices]; +} shmStruct; + +typedef struct ipcBarrier { + int count; + bool sense; + bool allExit; +} ipcBarrier_t; + +typedef struct ipcDevices { + int count; + int ordinals[kMaxDevices]; +} ipcDevices_t; + +static ipcBarrier_t* g_Barrier{}; +static bool g_procSense; +static int g_processCnt; + +/* + Get device with P2P access to device 0. +*/ +static void get_devices(ipcDevices_t* devices) { + pid_t pid = fork(); + + if (!pid) { + // HIP APIs are called in child process, + // to avoid HIP initialization in main process. + int i, device_count; + HIP_CHECK(hipGetDeviceCount(&device_count)); + + int mem_pool_support = 0; + HIP_CHECK(hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, 0)); + if (!mem_pool_support) { + devices->count = 0; + exit(EXIT_SUCCESS); + } + + // Device 0 + devices->ordinals[0] = 0; + devices->count = 1; + + if (device_count < 2) { + exit(EXIT_SUCCESS); + } + + int can_peer_access_0i, can_peer_access_i0; + for (i = 1; i < device_count; i++) { + HIP_CHECK(hipDeviceCanAccessPeer(&can_peer_access_0i, 0, i)); + HIP_CHECK(hipDeviceCanAccessPeer(&can_peer_access_i0, i, 0)); + HIP_CHECK( + hipDeviceGetAttribute(&mem_pool_support, hipDeviceAttributeMemoryPoolsSupported, i)); + + if (can_peer_access_0i * can_peer_access_i0 * mem_pool_support) { + devices->ordinals[i] = i; + INFO("Two-way peer access is available between GPU" << devices->ordinals[0] << " and GPU" + << devices->ordinals[i]); + devices->count += 1; + if (devices->count >= kMaxDevices) break; + } else { + break; + } + } + + exit(EXIT_SUCCESS); + } else { + int status; + waitpid(pid, &status, 0); + HIP_ASSERT(!status); + } +} + +/* + Calling process waits for other processes to signal/complete. +*/ +static void process_barrier() { + int newCount = __sync_add_and_fetch(&g_Barrier->count, 1); + + if (newCount == g_processCnt) { + g_Barrier->count = 0; + g_Barrier->sense = !g_procSense; + + } else { + while (g_Barrier->sense == g_procSense) { + if (!g_Barrier->allExit) { + sched_yield(); + } else { + exit(EXIT_FAILURE); + } + } + } + + g_procSense = !g_procSense; +} + +/* Child process(es) import shared memory pool and check if allocated memory can be accessed and + * used*/ +static void child_process(int id) { + volatile shmStruct* shm = NULL; + hipStream_t stream; + sharedMemoryInfo info; + void* ptr; + + LinearAllocGuard host_ptr(LinearAllocs::hipHostMalloc, kPageSize); + + ipcHandle* ipc_child_handle = NULL; + checkIpcErrors(ipcOpenSocket(ipc_child_handle)); + + // wait for parent process to create shared memory + process_barrier(); + + if (sharedMemoryOpen(shm_name, sizeof(shmStruct), &info) != 0) { + INFO("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = reinterpret_cast(info.addr); + shm->processes[id] = getpid(); + + // wait for parent process to send shareable handle + process_barrier(); + + // Receive allocation handle shared by parent. + std::vector sh_handle(1); + checkIpcErrors(ipcRecvShareableHandles(ipc_child_handle, sh_handle)); + + HIP_CHECK(hipSetDevice(0)); + HIP_CHECK(hipStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); + + hipMemPool_t pool; + + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + + // Import mem pool from all the devices created in the master + // process using shareable handles received via socket + // and import the pointer to the allocated buffer using + // exportData filled in shared memory by the master process. + HIP_CHECK(hipMemPoolImportFromShareableHandle(&pool, reinterpret_cast(sh_handle[0]), + handle_type, 0)); + + hipMemAccessFlags access_flags; + hipMemLocation location; + location.type = hipMemLocationTypeDevice; + location.id = 0; + HIP_CHECK(hipMemPoolGetAccess(&access_flags, pool, &location)); + if (access_flags != hipMemAccessFlagsProtReadWrite) { + hipMemAccessDesc desc; + memset(&desc, 0, sizeof(hipMemAccessDesc)); + desc.location.type = hipMemLocationTypeDevice; + desc.location.id = 0; + desc.flags = hipMemAccessFlagsProtReadWrite; + HIP_CHECK(hipMemPoolSetAccess(pool, &desc, 1)); + } + + // Import the allocation from memory pool using the opaque export data retrieved through + // the shared memory + HIP_CHECK(hipMemPoolImportPointer(&ptr, pool, + const_cast(&shm->exportPtrData[id]))); + + // Since we have imported allocations shared by the parent with us, we can + // close this ShareableHandle. + checkIpcErrors(ipcCloseShareableHandle(sh_handle[0])); + + // Since we have imported allocations shared by the parent with us, we can + // close the socket + checkIpcErrors(ipcCloseSocket(ipc_child_handle)); + + // Child processed accesses imported buffer + const auto element_count = kPageSize / sizeof(int); + constexpr auto thread_count = 1024; + const auto block_count = element_count / thread_count + 1; + int expected_value = 12 + id; + VectorSet<<>>((int*)ptr, expected_value, element_count); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipStreamSynchronize(stream)); + + // Copy the buffer locally + HIP_CHECK(hipMemcpyAsync(host_ptr.host_ptr(), ptr, kPageSize, hipMemcpyDeviceToHost, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + INFO("Process " << id << "verifying...\n"); + + // Check if the content is as expected + ArrayFindIfNot(host_ptr.host_ptr(), expected_value, element_count); + + // Free the memory before the exporter process frees it + HIP_CHECK(hipFreeAsync(ptr, stream)); + + // And wait for all the queued up work to complete + HIP_CHECK(hipStreamSynchronize(stream)); + HIP_CHECK(hipStreamDestroy(stream)); +} + +static void parent_process(int dev_count) { + sharedMemoryInfo info; + int i; + volatile shmStruct* shm = NULL; + std::vector ptrs; + std::vector child_processes; + + if (sharedMemoryCreate(shm_name, sizeof(*shm), &info) != 0) { + INFO("Failed to create shared memory slab\n"); + exit(EXIT_FAILURE); + } + shm = (volatile shmStruct*)info.addr; + memset((void*)shm, 0, sizeof(*shm)); + + // wait for child processes to insert their pids into shared memory + process_barrier(); + + std::vector shareable_handles(dev_count); + std::vector streams(dev_count); + std::vector pools(dev_count); + + // Now allocate memory for each process and fill the shared + // memory buffer with the export data and get mempool handles to communicate + for (i = 0; i < dev_count; i++) { + void* ptr; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipStreamCreateWithFlags(&streams[i], hipStreamNonBlocking)); + // Allocate an explicit pool with IPC capabilities + hipMemPoolProps pool_props; + memset(&pool_props, 0, sizeof(hipMemPoolProps)); + pool_props.allocType = hipMemAllocationTypePinned; + pool_props.handleTypes = hipMemHandleTypePosixFileDescriptor; + + pool_props.location.type = hipMemLocationTypeDevice; + pool_props.location.id = i; + + HIP_CHECK(hipMemPoolCreate(&pools[i], &pool_props)); + + // Query the shareable handle for the pool + hipMemAllocationHandleType handle_type = hipMemHandleTypePosixFileDescriptor; + // Allocate memory in a stream from the pool just created + HIP_CHECK(hipMallocFromPoolAsync(&ptr, kPageSize, pools[i], streams[i])); + + HIP_CHECK(hipMemPoolExportToShareableHandle(&shareable_handles[i], pools[i], handle_type, 0)); + + // Memset handle to 0 to make sure call to hipMemPoolImportPointer in + // child process will fail if the following call to hipMemPoolExportPointer fails. + memset((void*)&shm->exportPtrData[i], 0, sizeof(hipMemPoolPtrExportData)); + HIP_CHECK( + hipMemPoolExportPointer(const_cast(&shm->exportPtrData[i]), ptr)); + ptrs.push_back(ptr); + child_processes.push_back(static_cast(shm->processes[i])); + } + + ipcHandle* ipc_parent_handle; + checkIpcErrors(ipcCreateSocket(ipc_parent_handle, ipc_name, child_processes)); + + for (i = 0; i < dev_count; i++) { + std::vector current_handle(1, shareable_handles[i]); + std::vector current_process(1, child_processes[i]); + checkIpcErrors(ipcSendShareableHandles(ipc_parent_handle, current_handle, current_process)); + } + + // Close the shareable handles as they are not needed anymore. + for (int i = 0; i < dev_count; i++) { + checkIpcErrors(ipcCloseShareableHandle(shareable_handles[i])); + } + + checkIpcErrors(ipcCloseSocket(ipc_parent_handle)); + + process_barrier(); + + // And wait for them to finish + for (i = 0; i < child_processes.size(); i++) { + if (waitProcess(&child_processes[i]) != EXIT_SUCCESS) { + INFO("Process " << i << " failed!\n"); + exit(EXIT_FAILURE); + } + } + + // Clean up! + for (i = 0; i < dev_count; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipFreeAsync(ptrs[i], streams[i])); + HIP_CHECK(hipStreamSynchronize(streams[i])); + HIP_CHECK(hipMemPoolDestroy(pools[i])); + HIP_CHECK(hipStreamDestroy(streams[i])); + } + + sharedMemoryClose(&info); +} + +/** + * Test Description + * ------------------------ + * - Test to verify exporting/importing a shareable handle on a single device between parent and + * child process using IPC mechanisms - shared memory and sockets. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImportIPC.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_IPC_Functional") { + ipcDevices_t* shm_devices; + shm_devices = reinterpret_cast( + mmap(NULL, sizeof(*shm_devices), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + REQUIRE(MAP_FAILED != shm_devices); + // Barrier is used to synchronize created processes + g_Barrier = reinterpret_cast( + mmap(NULL, sizeof(*g_Barrier), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + memset(g_Barrier, 0, sizeof(*g_Barrier)); + + // set local barrier sense flag + g_procSense = 0; + + get_devices(shm_devices); + if (!shm_devices->count) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + // Set device count to 1 + shm_devices->count = 1; + g_processCnt = shm_devices->count + 1; + int index = 0; + + Process process = fork(); + if (process != 0) { + parent_process(shm_devices->count); + } else { + child_process(index); + } +} + +/** + * Test Description + * ------------------------ + * - Test to verify exporting/importing a shareable handle on multiple devices between parent and + * child processes using IPC mechanisms - shared memory and sockets. + * Test source + * ------------------------ + * - /unit/memory/hipMemPoolExportImportIPC.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipMemPoolExportImport_MultipleDevices_IPC_Functional") { + ipcDevices_t* shm_devices; + shm_devices = reinterpret_cast( + mmap(NULL, sizeof(*shm_devices), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + REQUIRE(MAP_FAILED != shm_devices); + // Barrier is used to synchronize processes created. + g_Barrier = reinterpret_cast( + mmap(NULL, sizeof(*g_Barrier), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0)); + memset(g_Barrier, 0, sizeof(*g_Barrier)); + + // set local barrier sense flag + g_procSense = 0; + + get_devices(shm_devices); + if (!shm_devices->count) { + SUCCEED("Runtime doesn't support Memory Pool. Skip the test case."); + return; + } + g_processCnt = shm_devices->count + 1; + + int index = 0; + + for (int i = 1; i < g_processCnt; i++) { + Process process = fork(); + if (!process) { + index = i; + break; + } + } + + if (index == 0) { + parent_process(shm_devices->count); + } else { + child_process(index - 1); + } +} +#endif From 6f9f5c07fe6db3122dd01b02baee583a06abfc8a Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 22 Dec 2023 15:47:34 +0000 Subject: [PATCH 51/71] EXSWHTEC-370 - Implement tests for the hipDrvGraph*MemcpyNode APIs #446 Change-Id: I956dc06157324e9d6971348a70b600c4a9105538 --- catch/include/memcpy3d_tests_common.hh | 96 +++++- catch/unit/graph/CMakeLists.txt | 2 + catch/unit/graph/hipDrvGraphAddMemcpyNode.cc | 295 ++++++++++++++++ .../graph/hipDrvGraphMemcpyNodeGetParams.cc | 91 +++++ .../graph/hipDrvGraphMemcpyNodeSetParams.cc | 314 ++++++++++++++++++ 5 files changed, 788 insertions(+), 10 deletions(-) create mode 100644 catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc create mode 100644 catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc diff --git a/catch/include/memcpy3d_tests_common.hh b/catch/include/memcpy3d_tests_common.hh index 84d0fc517b..acb549c94a 100644 --- a/catch/include/memcpy3d_tests_common.hh +++ b/catch/include/memcpy3d_tests_common.hh @@ -23,7 +23,7 @@ THE SOFTWARE. #pragma once #pragma clang diagnostic ignored "-Wmissing-field-initializers" #pragma clang diagnostic ignored "-Wunused-lambda-capture" - +#pragma clang diagnostic ignored "-Wunused-parameter" #include #include @@ -44,8 +44,9 @@ static inline hipMemcpyKind ReverseMemcpyDirection(const hipMemcpyKind direction } }; -static hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, - hipPos src_pos, hipExtent extent, hipMemcpyKind kind) { +static inline hipMemcpy3DParms GetMemcpy3DParms(PtrVariant dst_ptr, hipPos dst_pos, + PtrVariant src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { hipMemcpy3DParms parms = {0}; if (std::holds_alternative(dst_ptr)) { parms.dstArray = std::get(dst_ptr); @@ -185,7 +186,7 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null HIP_CHECK(hipDeviceCanAccessPeer(&can_access_peer, src_device, dst_device)); if (!can_access_peer) { std::string msg = "Skipped as peer access cannot be enabled between devices " + - std::to_string(src_device) + " " + std::to_string(dst_device); + std::to_string(src_device) + " " + std::to_string(dst_device); HipTest::HIP_SKIP_TEST(msg.c_str()); return; } @@ -205,7 +206,8 @@ void Memcpy3DDeviceToDeviceShell(F memcpy_func, hipStream_t kernel_stream = null // Using dst_alloc width and height to set only the elements that will be copied over to // dst_alloc Iota<<>>(src_alloc.ptr(), src_alloc.pitch(), - dst_alloc.width_logical(),dst_alloc.height(), dst_alloc.depth()); + dst_alloc.width_logical(), + dst_alloc.height(), dst_alloc.depth()); HIP_CHECK(hipGetLastError()); HIP_CHECK(memcpy_func(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), @@ -626,15 +628,14 @@ constexpr auto MemTypeUnified() { using DrvPtrVariant = std::variant; -template -hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, - hipPos src_pos, hipExtent extent, hipMemcpyKind kind, - hipStream_t stream = nullptr) { +static inline HIP_MEMCPY3D GetDrvMemcpy3DParms(DrvPtrVariant dst_ptr, hipPos dst_pos, + DrvPtrVariant src_ptr, hipPos src_pos, + hipExtent extent, hipMemcpyKind kind) { HIP_MEMCPY3D parms = {0}; if (std::holds_alternative(dst_ptr)) { parms.dstMemoryType = hipMemoryTypeArray; - parms.dstArray = std::get(dst_ptr); + parms.dstArray = std::get(dst_ptr); } else { auto ptr = std::get(dst_ptr); parms.dstPitch = ptr.pitch; @@ -694,6 +695,81 @@ hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVaria parms.dstY = dst_pos.y; parms.dstZ = dst_pos.z; + return parms; +} + +static inline bool operator==(const HIP_MEMCPY3D& lhs, const HIP_MEMCPY3D& rhs) { + bool pos_eq = lhs.dstXInBytes == rhs.dstXInBytes && lhs.dstY == rhs.dstY && + lhs.dstZ == rhs.dstZ && lhs.srcXInBytes == rhs.srcXInBytes && lhs.srcY == rhs.srcY && + lhs.srcZ == rhs.srcZ; + bool extent_eq = + lhs.WidthInBytes == rhs.WidthInBytes && lhs.Height == rhs.Height && lhs.Depth == rhs.Depth; + bool mem_eq = true; + if (lhs.dstArray) { + mem_eq = lhs.dstArray == rhs.dstArray && lhs.dstMemoryType == rhs.dstMemoryType; + } else { + mem_eq = lhs.dstPitch == rhs.dstPitch && lhs.dstMemoryType == rhs.dstMemoryType; + } + if (lhs.srcArray) { + mem_eq = lhs.srcArray == rhs.srcArray && lhs.srcMemoryType == rhs.srcMemoryType; + } else { + mem_eq = lhs.srcPitch == rhs.srcPitch && lhs.srcMemoryType == rhs.srcMemoryType; + } + if (lhs.dstDevice) { + mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice); + } + if (lhs.dstHost) { + mem_eq = mem_eq && (lhs.dstDevice == rhs.dstDevice); + } + if (lhs.srcDevice) { + mem_eq = mem_eq && (lhs.srcDevice == rhs.srcDevice); + } + if (lhs.srcHost) { + mem_eq = mem_eq && (lhs.srcHost == rhs.srcHost); + } + + return pos_eq && extent_eq && mem_eq; +} + +template +hipError_t DrvMemcpy3DGraphWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context, hipStream_t stream = nullptr) { + auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + + hipGraph_t g = nullptr; + HIP_CHECK(hipGraphCreate(&g, 0)); + hipGraphNode_t node = nullptr; + if constexpr (set_params) { + auto reversed_parms = GetDrvMemcpy3DParms(src_ptr, src_pos, dst_ptr, dst_pos, extent, + ReverseMemcpyDirection(kind)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &reversed_parms, context)); + HIP_CHECK(hipDrvGraphMemcpyNodeSetParams(node, &parms)); + } else { + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, g, nullptr, 0, &parms, context)); + } + + HIP_MEMCPY3D retrieved_params = {0}; + HIP_CHECK(hipDrvGraphMemcpyNodeGetParams(node, &retrieved_params)); + REQUIRE(parms == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, g, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(g)); + + return hipSuccess; +} + +template +hipError_t DrvMemcpy3DWrapper(DrvPtrVariant dst_ptr, hipPos dst_pos, DrvPtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipStream_t stream = nullptr) { + auto parms = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + if constexpr (async) { return hipDrvMemcpy3DAsync(&parms, stream); } else { diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index cef5d2f5b7..82525991c7 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -151,6 +151,8 @@ set(TEST_SRC hipDrvGraphAddMemcpyNode.cc hipGraphAddMemAllocNode.cc hipGraphAddMemFreeNode.cc + hipDrvGraphMemcpyNodeGetParams.cc + hipDrvGraphMemcpyNodeSetParams.cc ) add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) diff --git a/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc b/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc index c3bbf553db..75c20be754 100644 --- a/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc +++ b/catch/unit/graph/hipDrvGraphAddMemcpyNode.cc @@ -17,11 +17,28 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include + #include +#include #include +#include + #include "numeric" +#include "graph_tests_common.hh" + #define XSIZE 32 +/** + * @addtogroup hipDrvGraphAddMemcpyNode hipDrvGraphAddMemcpyNode + * @{ + * @ingroup GraphTest + * `hipDrvGraphAddMemcpyNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const + * hipGraphNode_t *pDependencies, size_t numDependencies, const HIP_MEMCPY3D* copyParams, hipCtx_t + ctx)` + - Creates a memcpy node and adds it to a graph + */ + /** * Test Description * ------------------------ @@ -362,3 +379,281 @@ TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_MulitDevice") { } } #endif + +/** + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy node is created with parameters set according to the + * test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Positive_Basic") { + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Device to host") { + Memcpy3DDeviceToHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to device") { + Memcpy3DHostToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to host") { + Memcpy3DHostToHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Positive_Array") { + CHECK_IMAGE_SUPPORT + + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Array from/to Host") { + DrvMemcpy3DArrayHostShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Array from/to Device") { + DrvMemcpy3DArrayDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper<>, _1, _2, _3, _4, _5, _6, context, _7)); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# dst is nullptr + * -# src is nullptr + * -# dstPitch < width + * -# srcPitch < width + * -# dstPitch > max pitch + * -# srcPitch > max pitch + * -# WidthInBytes + dstXInBytes > dstPitch + * -# WidthInBytes + srcXInBytes > srcPitch + * -# dstY out of bounds + * -# srcY out of bounds + * -# dstZ out of bounds + * -# srcZ out of bounds + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemcpyNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemcpyNode_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + GraphAddNodeCommonNegativeTests( + std::bind(hipDrvGraphAddMemcpyNode, _1, _2, _3, _4, ¶ms, context), graph); + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstPitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidPitchValue); + } + + SECTION("srcPitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidPitchValue); + } + + SECTION("dstPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto params = GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstY out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcY out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("dstZ out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + SECTION("srcZ out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context), + hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, context); + } + + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost, context); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost, context); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, context); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} \ No newline at end of file diff --git a/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc b/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc new file mode 100644 index 0000000000..26062bbf13 --- /dev/null +++ b/catch/unit/graph/hipDrvGraphMemcpyNodeGetParams.cc @@ -0,0 +1,91 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup hipDrvGraphMemcpyNodeGetParams hipDrvGraphMemcpyNodeGetParams + * @{ + * @ingroup GraphTest + * `hipDrvGraphMemcpyNodeGetParams(hipGraphNode_t hNode, HIP_MEMCPY3D* nodeParams)` - + * Gets a memcpy node's parameters + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Basic + */ + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# pNodeParams is nullptr + * -# node is destroyed + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeGetParams_Negative_Parameters") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + + auto params = + GetDrvMemcpy3DParms(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), dst_alloc.extent(), hipMemcpyDeviceToDevice); + + hipGraph_t graph = nullptr; + hipGraphNode_t node = nullptr; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(nullptr, ¶ms), hipErrorInvalidValue); + } + + SECTION("pNodeParams == nullptr") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(node, nullptr), hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(graph)); + } + + SECTION("Node is destroyed") { + HIP_CHECK(hipGraphCreate(&graph, 0)); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeGetParams(node, ¶ms), hipErrorInvalidValue); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} diff --git a/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc b/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc new file mode 100644 index 0000000000..b1325206e7 --- /dev/null +++ b/catch/unit/graph/hipDrvGraphMemcpyNodeSetParams.cc @@ -0,0 +1,314 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include +#include +#include + +/** + * @addtogroup hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeSetParams + * @{ + * @ingroup GraphTest + * `hipDrvGraphMemcpyNodeSetParams(hipGraphNode_t hNode, const HIP_MEMCPY3D* nodeParams)` - Sets a + * memcpy node's parameters + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values after which the graph is + * executed and the results of the memcpy verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Basic") { + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Device to host") { + Memcpy3DDeviceToHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to device") { + Memcpy3DHostToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Host to host") { + Memcpy3DHostToHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Positive_Array") { + CHECK_IMAGE_SUPPORT + + using namespace std::placeholders; + + constexpr bool async = false; + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + SECTION("Array from/to Host") { + DrvMemcpy3DArrayHostShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + SECTION("Array from/to Device") { + DrvMemcpy3DArrayDeviceShell( + std::bind(DrvMemcpy3DGraphWrapper, _1, _2, _3, _4, _5, _6, context, _7)); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# node is nullptr + * -# dst is nullptr + * -# src is nullptr + * -# dstPitch < width + * -# srcPitch < width + * -# dstPitch > max pitch + * -# srcPitch > max pitch + * -# WidthInBytes + dstXInBytes > dstPitch + * -# WidthInBytes + srcXInBytes > srcPitch + * -# dstY out of bounds + * -# srcY out of bounds + * -# dstZ out of bounds + * -# srcZ out of bounds + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphMemcpyNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphMemcpyNodeSetParams_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + constexpr hipExtent extent{128 * sizeof(int), 128, 8}; + + constexpr auto NegativeTests = [](hipPitchedPtr dst_ptr, hipPos dst_pos, hipPitchedPtr src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipCtx_t context) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + auto params = GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&node, graph, nullptr, 0, ¶ms, context)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(nullptr, ¶ms), hipErrorInvalidValue); + } + + SECTION("dst_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.ptr = nullptr; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("src_ptr.ptr == nullptr") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.ptr = nullptr; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstPitch < width") { + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = extent.width - 1; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), + hipErrorInvalidPitchValue); + } + + SECTION("srcPitch < width") { + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = extent.width - 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), + hipErrorInvalidPitchValue); + } + + SECTION("dstPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = dst_ptr; + invalid_ptr.pitch = attr; + auto invalid_params = + GetDrvMemcpy3DParms(invalid_ptr, dst_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcPitch > max pitch") { + int attr = 0; + HIP_CHECK(hipDeviceGetAttribute(&attr, hipDeviceAttributeMaxPitch, 0)); + hipPitchedPtr invalid_ptr = src_ptr; + invalid_ptr.pitch = attr; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, invalid_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("WidthInBytes + dstXInBytes > dstPitch") { + hipPos invalid_pos = dst_pos; + invalid_pos.x = dst_ptr.pitch - extent.width + 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("WidthInBytes + srcXInBytes > srcPitch") { + hipPos invalid_pos = src_pos; + invalid_pos.x = src_ptr.pitch - extent.width + 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstY out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.y = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcY out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.y = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("dstZ out of bounds") { + hipPos invalid_pos = dst_pos; + invalid_pos.z = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, invalid_pos, src_ptr, src_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + SECTION("srcZ out of bounds") { + hipPos invalid_pos = src_pos; + invalid_pos.z = 1; + auto invalid_params = + GetDrvMemcpy3DParms(dst_ptr, dst_pos, src_ptr, invalid_pos, extent, kind); + HIP_CHECK_ERROR(hipDrvGraphMemcpyNodeSetParams(node, &invalid_params), hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + }; + + SECTION("Host to Device") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), + make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToDevice, context); + } + + SECTION("Device to Host") { + LinearAllocGuard3D device_alloc(extent); + LinearAllocGuard host_alloc( + LinearAllocs::hipHostMalloc, + device_alloc.pitch() * device_alloc.height() * device_alloc.depth()); + NegativeTests(make_hipPitchedPtr(host_alloc.ptr(), device_alloc.pitch(), device_alloc.width(), + device_alloc.height()), + make_hipPos(0, 0, 0), device_alloc.pitched_ptr(), make_hipPos(0, 0, 0), extent, + hipMemcpyDeviceToHost, context); + } + + SECTION("Host to Host") { + LinearAllocGuard src_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + LinearAllocGuard dst_alloc(LinearAllocs::hipHostMalloc, + extent.width * extent.height * extent.depth); + NegativeTests(make_hipPitchedPtr(dst_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), + make_hipPitchedPtr(src_alloc.ptr(), extent.width, extent.width, extent.height), + make_hipPos(0, 0, 0), extent, hipMemcpyHostToHost, context); + } + + SECTION("Device to Device") { + LinearAllocGuard3D src_alloc(extent); + LinearAllocGuard3D dst_alloc(extent); + NegativeTests(dst_alloc.pitched_ptr(), make_hipPos(0, 0, 0), src_alloc.pitched_ptr(), + make_hipPos(0, 0, 0), extent, hipMemcpyDeviceToDevice, context); + } + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} From 63c93f95a89423d8fd8cfd20922c7bec55ffc5f5 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Mon, 25 Dec 2023 17:57:36 +0000 Subject: [PATCH 52/71] EXSWHTEC-371 - Implement tests for the hipDrvGraphAddMemsetNode API #447 Change-Id: I981e14f34b008054d46e61ebae0099792df446b1 --- catch/include/memcpy3d_tests_common.hh | 2 +- catch/unit/graph/CMakeLists.txt | 1 + .../graph/graph_memset_node_test_common.hh | 6 +- catch/unit/graph/hipDrvGraphAddMemsetNode.cc | 670 ++++++++++++++++++ catch/unit/graph/hipGraphAddMemsetNode.cc | 2 +- .../unit/graph/hipGraphMemsetNodeSetParams.cc | 2 +- 6 files changed, 677 insertions(+), 6 deletions(-) create mode 100644 catch/unit/graph/hipDrvGraphAddMemsetNode.cc diff --git a/catch/include/memcpy3d_tests_common.hh b/catch/include/memcpy3d_tests_common.hh index acb549c94a..68e388da19 100644 --- a/catch/include/memcpy3d_tests_common.hh +++ b/catch/include/memcpy3d_tests_common.hh @@ -881,4 +881,4 @@ void DrvMemcpy3DArrayDeviceShell(F memcpy_func, const hipStream_t kernel_stream }; PitchedMemoryVerify(host_alloc.ptr(), extent.width, extent.width / sizeof(int), extent.height, extent.depth, f); -} +} \ No newline at end of file diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index 82525991c7..c7b4036c49 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -153,6 +153,7 @@ set(TEST_SRC hipGraphAddMemFreeNode.cc hipDrvGraphMemcpyNodeGetParams.cc hipDrvGraphMemcpyNodeSetParams.cc + hipDrvGraphAddMemsetNode.cc ) add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) diff --git a/catch/unit/graph/graph_memset_node_test_common.hh b/catch/unit/graph/graph_memset_node_test_common.hh index f4b957283e..b23a169339 100644 --- a/catch/unit/graph/graph_memset_node_test_common.hh +++ b/catch/unit/graph/graph_memset_node_test_common.hh @@ -26,14 +26,14 @@ THE SOFTWARE. #include #include -template void GraphMemsetNodeCommonPositive(F f) { +template void GraphMemsetNodeCommonPositive(F f) { const size_t width = GENERATE(1, 64, kPageSize / sizeof(T) + 1); const size_t height = GENERATE(1, 2, 1024); DYNAMIC_SECTION("Width: " << width << " Height: " << height) { LinearAllocGuard2D alloc(width, height); constexpr T set_value = 42; - hipMemsetParams params = {}; + Tp params = {}; params.dst = alloc.ptr(); params.elementSize = sizeof(T); params.width = width; @@ -50,7 +50,7 @@ template void GraphMemsetNodeCommonPositive(F f) { } } -template void MemsetCommonNegative(F f, hipMemsetParams params) { +template void MemsetCommonNegative(F f, T params) { SECTION("pMemsetParams == nullptr") { HIP_CHECK_ERROR(f(nullptr), hipErrorInvalidValue); } SECTION("pMemsetParams.dst == nullptr") { diff --git a/catch/unit/graph/hipDrvGraphAddMemsetNode.cc b/catch/unit/graph/hipDrvGraphAddMemsetNode.cc new file mode 100644 index 0000000000..a96988701d --- /dev/null +++ b/catch/unit/graph/hipDrvGraphAddMemsetNode.cc @@ -0,0 +1,670 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include +#include +#include +#include +#include + +#include "graph_memset_node_test_common.hh" +#include "graph_tests_common.hh" + + +/** + * @addtogroup hipDrvGraphAddMemsetNode hipDrvGraphAddMemsetNode + * @{ + * @ingroup GraphTest + * `hipDrvGraphAddMemsetNode(hipGraphNode_t* phGraphNode, hipGraph_t hGraph, const hipGraphNode_t* + * dependencies, size_t numDependencies, const HIP_MEMSET_NODE_PARAMS* memsetParams, hipCtx_t ctx)` + * - Creates a memset node and adds it to a graph + */ + +/** + * Test Description + * ------------------------ + * - Verify that all elements of destination memory are set to the correct value. + * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different + * height and width, both on host and device. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEMPLATE_TEST_CASE("Unit_hipDrvGraphAddMemsetNode_Positive_Basic", "", uint8_t, uint16_t, + uint32_t) { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + const auto f = [&context](HIP_MEMSET_NODE_PARAMS* params) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipDrvGraphAddMemsetNode(&node, graph, nullptr, 0, params, context)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + + GraphMemsetNodeCommonPositive(f); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Verify API behaviour with invalid arguments: + * -# pGraphNode is nullptr + * -# graph is nullptr + * -# pDependencies is nullptr when numDependencies is not zero + * -# A node in pDependencies originates from a different graph + * -# numDependencies is invalid + * -# A node is duplicated in pDependencies + * -# pMemsetParams is nullptr + * -# pMemsetParams::dst is nullptr + * -# pMemsetParams::elementSize is different from 1, 2, and 4 + * -# pMemsetParams::width is zero + * -# pMemsetParams::width is larger than the allocated memory region + * -# pMemsetParams::height is zero + * -# pMemsetParams::pitch is less than width when height is more than 1 + * -# pMemsetParams::pitch * pMemsetParams::height is larger than the allocated memory region + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_Negative_Parameters") { + using namespace std::placeholders; + + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + LinearAllocGuard alloc(LinearAllocs::hipMalloc, 4 * sizeof(int)); + HIP_MEMSET_NODE_PARAMS params = {}; + params.dst = alloc.ptr(); + params.elementSize = sizeof(*alloc.ptr()); + params.width = 1; + params.height = 1; + params.value = 42; + + GraphAddNodeCommonNegativeTests( + std::bind(hipDrvGraphAddMemsetNode, _1, _2, _3, _4, ¶ms, context), graph); + + hipGraphNode_t node = nullptr; + MemsetCommonNegative(std::bind(hipDrvGraphAddMemsetNode, &node, graph, nullptr, 0, _1, context), + params); + + HIP_CHECK(hipGraphDestroy(graph)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 2D array using hipMallocPitch. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_2D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char), numW{SIZE}, numH{SIZE}, pitch_A; + char* A_d; + + hipGraph_t graph; + std::vector nodeDependencies; + // Host memory. + char* A_h = new char[numW * numH]; + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + *(A_h + i * numH + j) = ' '; + } + } + // 2D Memory allocation hipMallocPitch + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, numH)); + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = pitch_A; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = numH; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = make_hipPitchedPtr(A_d, pitch_A, numW, numH); + auto dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); + auto extent = make_hipExtent(width, numH, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + REQUIRE(*(A_h + i * numH + j) == memSetVal); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + delete[] A_h; + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMallocPitch. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMallocPitch_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char), numW{SIZE}, pitch_A; + char* A_d; + + // Initialize the host memory + std::vector A_h(numW, ' '); + + hipGraph_t graph; + std::vector nodeDependencies; + // 1D Memory allocation hipMallocPitch + HIP_CHECK(hipMallocPitch(reinterpret_cast(&A_d), &pitch_A, width, 1)); + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = pitch_A; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = make_hipPitchedPtr(A_d, pitch_A, numW, 1); + auto dstPtr = make_hipPitchedPtr(A_h.data(), width, numW, 1); + auto extent = make_hipExtent(width, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 2D array using hipMalloc3D. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_2D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char); + size_t numW = SIZE, numH = SIZE; + + // Host Memory + char* A_h = new char[numW * numH]; + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + *(A_h + i * numH + j) = ' '; + } + } + hipGraph_t graph; + std::vector nodeDependencies; + + hipPitchedPtr A_d; + hipExtent extent3D = make_hipExtent(width, numH, 1); + + // Allocate 3D memory. + HIPCHECK(hipMalloc3D(&A_d, extent3D)); + + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d.ptr); + memsetParams.value = memSetVal; + memsetParams.pitch = A_d.pitch; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = numH; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = A_d; + auto dstPtr = make_hipPitchedPtr(A_h, width, numW, numH); + auto extent = make_hipExtent(width, numH, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + for (size_t j = 0; j < numH; j++) { + REQUIRE(*(A_h + i * numH + j) == memSetVal); + } + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + delete[] A_h; + HIP_CHECK(hipFree(A_d.ptr)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMalloc3D. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc3D_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + CHECK_IMAGE_SUPPORT + + size_t width = SIZE * sizeof(char); + size_t numW = SIZE; + + // Initialize the host memory + std::vector A_h(numW, ' '); + + hipGraph_t graph; + std::vector nodeDependencies; + + hipPitchedPtr A_d; + hipExtent extent1D = make_hipExtent(width, 1, 1); + + // Allocate 3D memory. + HIPCHECK(hipMalloc3D(&A_d, extent1D)); + + // Create Graph + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t memsetNode, memcpyNode; + + // Add MemSet Node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d.ptr); + memsetParams.value = memSetVal; + memsetParams.pitch = A_d.pitch; + memsetParams.elementSize = sizeof(char); + memsetParams.width = numW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = A_d; + auto dstPtr = make_hipPitchedPtr(A_h.data(), width, numW, 1); + auto extent = make_hipExtent(width, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < numW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)) + HIP_CHECK(hipFree(A_d.ptr)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMalloc. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDrvGraphAddMemsetNode_hipMalloc_1D") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + char* A_d; + size_t NumW = SIZE; + size_t Nbytes1D = SIZE * sizeof(char); + + // Initialize the host memory + std::vector A_h(NumW, ' '); + + // Allocate memory to Device pointer + HIP_CHECK(hipMalloc(reinterpret_cast(&A_d), Nbytes1D)); + + // Create the graph + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memsetNode, memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Add Memset node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = Nbytes1D; + memsetParams.elementSize = sizeof(char); + memsetParams.width = NumW; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, NumW, 0}; + hipPitchedPtr hostPitchedPtr{A_h.data(), Nbytes1D, NumW, 0}; + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = devPitchedPtr; + auto dstPtr = hostPitchedPtr; + auto extent = make_hipExtent(Nbytes1D, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < NumW; i++) { + REQUIRE(A_h[i] == memSetVal); + } + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} + +/** + * Test Description + * ------------------------ + * - Allocate a 1D array using hipMallocManaged. Initialize the allocated memory using + * hipDrvGraphAddMemsetNode. Copy the values in device memory to host using + * hipDrvGraphAddMemcpyNode. Verify the results. + * Test source + * ------------------------ + * - unit/graph/hipDrvGraphAddMemsetNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddMemsetNode_hipMallocManaged") { + HIP_CHECK(hipInit(0)); + hipDevice_t device; + hipCtx_t context; + HIP_CHECK(hipDeviceGet(&device, 0)); + HIP_CHECK(hipCtxCreate(&context, 0, device)); + + int managed = 0; + HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); + INFO("hipDeviceAttributeManagedMemory: " << managed); + if (managed != 1) { + WARN( + "GPU 0 doesn't support hipDeviceAttributeManagedMemory attribute" + "so defaulting to system memory."); + } + size_t Nbytes1D = SIZE * sizeof(char); + char* A_d; + // Initialize the host memory + std::vector A_h(SIZE, ' '); + // Device Memory + HIP_CHECK(hipMallocManaged(&A_d, SIZE * sizeof(char))); + // Create the graph + hipGraph_t graph; + std::vector nodeDependencies; + hipGraphNode_t memsetNode, memcpyNode; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + // Add Memset node + HIP_MEMSET_NODE_PARAMS memsetParams{}; + memset(&memsetParams, 0, sizeof(memsetParams)); + memsetParams.dst = reinterpret_cast(A_d); + memsetParams.value = memSetVal; + memsetParams.pitch = Nbytes1D; + memsetParams.elementSize = sizeof(char); + memsetParams.width = SIZE; + memsetParams.height = 1; + HIP_CHECK(hipDrvGraphAddMemsetNode(&memsetNode, graph, nullptr, 0, &memsetParams, context)); + nodeDependencies.push_back(memsetNode); + + // Add MemCpy Node + hipPitchedPtr devPitchedPtr{A_d, Nbytes1D, SIZE, 1}; + hipPitchedPtr hostPitchedPtr{A_h.data(), Nbytes1D, SIZE, 1}; + + auto srcPos = make_hipPos(0, 0, 0); + auto dstPos = make_hipPos(0, 0, 0); + auto srcPtr = devPitchedPtr; + auto dstPtr = hostPitchedPtr; + auto extent = make_hipExtent(Nbytes1D, 1, 1); + hipMemcpyKind kind = hipMemcpyDeviceToHost; + + HIP_MEMCPY3D myparms = GetDrvMemcpy3DParms(dstPtr, dstPos, srcPtr, srcPos, extent, kind); + HIP_CHECK(hipDrvGraphAddMemcpyNode(&memcpyNode, graph, nodeDependencies.data(), + nodeDependencies.size(), &myparms, context)); + nodeDependencies.clear(); + + // Create executable graph + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verfication + for (size_t i = 0; i < SIZE; i++) { + REQUIRE(A_h[i] == memSetVal); + } + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipFree(A_d)); + + HIP_CHECK(hipCtxPopCurrent(&context)); + HIP_CHECK(hipCtxDestroy(context)); +} diff --git a/catch/unit/graph/hipGraphAddMemsetNode.cc b/catch/unit/graph/hipGraphAddMemsetNode.cc index af502ab07a..e11d08a4b3 100644 --- a/catch/unit/graph/hipGraphAddMemsetNode.cc +++ b/catch/unit/graph/hipGraphAddMemsetNode.cc @@ -76,7 +76,7 @@ TEMPLATE_TEST_CASE("Unit_hipGraphAddMemsetNode_Positive_Basic", "", uint8_t, uin return hipSuccess; }; - GraphMemsetNodeCommonPositive(f); + GraphMemsetNodeCommonPositive(f); } /** diff --git a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc index af8e6d50da..d816e8b88e 100644 --- a/catch/unit/graph/hipGraphMemsetNodeSetParams.cc +++ b/catch/unit/graph/hipGraphMemsetNodeSetParams.cc @@ -99,7 +99,7 @@ TEMPLATE_TEST_CASE("Unit_hipGraphMemsetNodeSetParams_Positive_Basic", "", uint8_ return hipSuccess; }; - GraphMemsetNodeCommonPositive(f); + GraphMemsetNodeCommonPositive(f); } /** From 00433d4f8787db9e774d71771283c232390f1636 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 2 Feb 2024 12:29:18 +0530 Subject: [PATCH 53/71] EXSWHTEC-380 - Implement tests for Virtual Memory Management API functions #448 Change-Id: Ic766be69fddd0309f7ad4093465494cc14c7c70b --- .../config/config_nvidia_linux.json | 4 +- .../config/config_nvidia_windows.json | 3 +- .../hipMallocManagedCommon.hh | 0 catch/include/hip_test_defgroups.hh | 190 +------- catch/unit/CMakeLists.txt | 1 + catch/unit/memory/CMakeLists.txt | 3 +- catch/unit/memory/hip_vmm_common.hh | 51 -- .../virtualMemoryManagement/CMakeLists.txt | 55 +++ .../hipMemAddressFree.cc | 18 +- .../hipMemAddressReserve.cc | 40 +- .../hipMemCreate.cc | 116 ++--- .../hipMemExportToShareableHandle.cc | 145 ++++++ .../hipMemGetAllocationGranularity.cc | 71 +-- ...hipMemGetAllocationPropertiesFromHandle.cc | 42 +- .../hipMemImportFromShareableHandle.cc | 210 ++++++++ .../hipMemMap.cc | 126 +++-- .../hipMemMapArrayAsync.cc | 110 +++++ .../hipMemRelease.cc | 8 +- .../hipMemRetainAllocationHandle.cc | 51 +- .../hipMemSetGetAccess.cc | 456 ++++++++---------- .../hipMemUnmap.cc | 26 +- .../hipMemVmm_old.cc} | 55 ++- .../virtualMemoryManagement/hip_vmm_common.hh | 49 ++ 23 files changed, 1043 insertions(+), 787 deletions(-) rename catch/{unit/memory => include}/hipMallocManagedCommon.hh (100%) delete mode 100644 catch/unit/memory/hip_vmm_common.hh create mode 100644 catch/unit/virtualMemoryManagement/CMakeLists.txt rename catch/unit/{memory => virtualMemoryManagement}/hipMemAddressFree.cc (84%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemAddressReserve.cc (83%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemCreate.cc (84%) create mode 100644 catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc rename catch/unit/{memory => virtualMemoryManagement}/hipMemGetAllocationGranularity.cc (73%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemGetAllocationPropertiesFromHandle.cc (84%) create mode 100644 catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc rename catch/unit/{memory => virtualMemoryManagement}/hipMemMap.cc (86%) create mode 100644 catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc rename catch/unit/{memory => virtualMemoryManagement}/hipMemRelease.cc (89%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemRetainAllocationHandle.cc (76%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemSetGetAccess.cc (83%) rename catch/unit/{memory => virtualMemoryManagement}/hipMemUnmap.cc (83%) rename catch/unit/{memory/hipMemVmm.cc => virtualMemoryManagement/hipMemVmm_old.cc} (58%) create mode 100644 catch/unit/virtualMemoryManagement/hip_vmm_common.hh diff --git a/catch/hipTestMain/config/config_nvidia_linux.json b/catch/hipTestMain/config/config_nvidia_linux.json index ada918a267..453c984199 100644 --- a/catch/hipTestMain/config/config_nvidia_linux.json +++ b/catch/hipTestMain/config/config_nvidia_linux.json @@ -92,6 +92,8 @@ "Unit_hipMemcpy_Positive_Synchronization_Behavior", "=== Below tests fail in external CI for PR https://github.com/ROCm-Developer-Tools/hip-tests/pull/210 ===", "Unit_StaticAssert_Positive_Basic", - "Unit_StaticAssert_Negative_Basic" + "Unit_StaticAssert_Negative_Basic", + "Unit_hipMemImportFromShareableHandle_Positive_MultiProc", + "Unit_hipMemMapArrayAsync_Positive_Basic" ] } diff --git a/catch/hipTestMain/config/config_nvidia_windows.json b/catch/hipTestMain/config/config_nvidia_windows.json index 3e7785a3e8..5d118b16e6 100644 --- a/catch/hipTestMain/config/config_nvidia_windows.json +++ b/catch/hipTestMain/config/config_nvidia_windows.json @@ -44,6 +44,7 @@ "Performance_hipMemsetD32", "Performance_hipMemsetD32Async", "Unit_hipMemcpyParam2D_Positive_Synchronization_Behavior", - "Unit_hipMemcpy_Positive_Synchronization_Behavior" + "Unit_hipMemcpy_Positive_Synchronization_Behavior", + "Unit_hipMemMapArrayAsync_Positive_Basic" ] } diff --git a/catch/unit/memory/hipMallocManagedCommon.hh b/catch/include/hipMallocManagedCommon.hh similarity index 100% rename from catch/unit/memory/hipMallocManagedCommon.hh rename to catch/include/hipMallocManagedCommon.hh diff --git a/catch/include/hip_test_defgroups.hh b/catch/include/hip_test_defgroups.hh index 8191c8b96f..2a8413d79f 100644 --- a/catch/include/hip_test_defgroups.hh +++ b/catch/include/hip_test_defgroups.hh @@ -103,16 +103,9 @@ THE SOFTWARE. */ /** -* @defgroup KernelTest Kernel Functions Management -* @{ -* This section describes the various kernel functions invocation. -* @} -*/ - -/** - * @defgroup AtomicsTest Device Atomics + * @defgroup KernelTest Kernel Functions Management * @{ - * This section describes tests for the Device Atomic APIs. + * This section describes the various kernel functions invocation. * @} */ @@ -140,7 +133,8 @@ THE SOFTWARE. /** * @defgroup PeerToPeerTest PeerToPeer Device Memory Access * @{ - * This section describes tests for the PeerToPeer device memory access functions of HIP runtime API. + * This section describes tests for the PeerToPeer device memory access functions of HIP runtime + * API. * @warning PeerToPeer support is experimental. * @} */ @@ -200,124 +194,6 @@ THE SOFTWARE. * @} */ -/** - * @defgroup AtomicsTest Device Atomics - * @{ - * This section describes tests for the Device Atomic APIs. - */ - -/** - * @addtogroup atomicAdd atomicAdd - * @{ - * @ingroup AtomicsTest - */ - -/** - * Test Description - * ------------------------ - * - Compiles atomicAdd with invalid parameters. - * - Compiles the source with specialized Python tool. - * -# Utilizes sub-process to invoke compilation of faulty source. - * -# Performs post-processing of compiler output and counts errors. - * Test source - * ------------------------ - * - unit/atomics/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_atomicAdd_Negative_Parameters") {} -/** - * End doxygen group atomicAdd. - * @} - */ - -/** - * @addtogroup atomicSub atomicSub - * @{ - * @ingroup AtomicsTest - */ - -/** - * Test Description - * ------------------------ - * - Compiles atomicSub with invalid parameters. - * - Compiles the source with specialized Python tool. - * -# Utilizes sub-process to invoke compilation of faulty source. - * -# Performs post-processing of compiler output and counts errors. - * Test source - * ------------------------ - * - unit/atomics/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_atomicSub_Negative_Parameters") {} -/** - * End doxygen group atomicSub. - * @} - */ - -/** - * @addtogroup atomicInc atomicInc - * @{ - * @ingroup AtomicsTest - */ - -/** - * Test Description - * ------------------------ - * - Compiles atomicInc with invalid parameters. - * - Compiles the source with specialized Python tool. - * -# Utilizes sub-process to invoke compilation of faulty source. - * -# Performs post-processing of compiler output and counts errors. - * Test source - * ------------------------ - * - unit/atomics/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_atomicInc_Negative_Parameters") {} -/** - * End doxygen group atomicInc. - * @} - */ - -/** - * @addtogroup atomicDec atomicDec - * @{ - * @ingroup AtomicsTest - */ - -/** - * Test Description - * ------------------------ - * - Compiles atomicDec with invalid parameters. - * - Compiles the source with specialized Python tool. - * -# Utilizes sub-process to invoke compilation of faulty source. - * -# Performs post-processing of compiler output and counts errors. - * Test source - * ------------------------ - * - unit/atomics/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_atomicDec_Negative_Parameters") {} -/** - * End doxygen group atomicDec. - * @} - */ - -/** - * End doxygen group AtomicsTest. - * @defgroup MathTest Math Device Functions - * @{ - * This section describes tests for device math functions of HIP runtime API. - * @} - */ - /** * @defgroup MathTest Math Device Functions * @{ @@ -347,62 +223,8 @@ TEST_CASE("Unit_atomicDec_Negative_Parameters") {} */ /** - * @defgroup DeviceLanguageTest Device Language + * @defgroup VirtualMemoryManagementTest Virtual Memory Management APIs * @{ - * This section describes tests for the Device Language API. - */ - -/** - * @addtogroup launch_bounds launch_bounds - * @{ - * @ingroup DeviceLanguageTest - */ - -/** - * Test Description - * ------------------------ - * - Validates handling of invalid arguments: - * -# Compiles kernels that are not created appropriately: - * - Maximum number of threads is 0 - * - Maximum number of threads is not integer value - * - Mimimum number of warps is not integer value - * -# Expected output: compiler error - * Test source - * ------------------------ - * - unit/launch_bounds/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_CompilerError") {} - -/** - * Test Description - * ------------------------ - * - Validates handling of invalid arguments: - * -# Compiles kernels that are not created appropriately: - * - Maximum number of threads is negative - * - Mimimum number of warps is negative - * - Validates handling of invalid arguments: - * -# Expected output: parse error - * Test source - * ------------------------ - * - unit/launch_bounds/CMakeLists.txt - * Test requirements - * ------------------------ - * - HIP_VERSION >= 5.2 - */ -TEST_CASE("Unit_Kernel_Launch_bounds_Negative_Parameters_ParseError") {} - -/** - * End doxygen group launch_bounds. + * This section describes the virtual memory management types & functions of HIP runtime API. * @} */ - -/** - * End doxygen group DeviceLanguageTest. - * @} - * @defgroup VectorTypeTest Vector types - * @{ - * This section describes tests for the Vector type functions and operators. - */ diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 59f8f6ad47..6b63292c91 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -52,6 +52,7 @@ add_subdirectory(p2p) add_subdirectory(gcc) add_subdirectory(syncthreads) add_subdirectory(threadfence) +add_subdirectory(virtualMemoryManagement) if(HIP_PLATFORM STREQUAL "amd") add_subdirectory(callback) diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index 4ef22c8013..47ae8aa9ed 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -94,7 +94,8 @@ if(HIP_PLATFORM MATCHES "amd") hipMemAddressFree.cc hipMemAddressReserve.cc hipMemRelease.cc - hipMemGetAllocationPropertiesFromHandle.cc) + hipMemGetAllocationPropertiesFromHandle.cc + hipArray.cc) else() set(TEST_SRC ${TEST_SRC} hipGetSymbolSizeAddress.cc) endif() diff --git a/catch/unit/memory/hip_vmm_common.hh b/catch/unit/memory/hip_vmm_common.hh deleted file mode 100644 index 24ecb6408a..0000000000 --- a/catch/unit/memory/hip_vmm_common.hh +++ /dev/null @@ -1,51 +0,0 @@ -/* -Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "hip_test_context.hh" - -#define checkVMMSupported(device) {\ - int value = 0;\ - hipDeviceAttribute_t\ - attr = hipDeviceAttributeVirtualMemoryManagementSupported;\ - HIP_CHECK(\ - hipDeviceGetAttribute(&value, attr, device));\ - if (value == 0) {\ - HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test..");\ - return;\ - }\ -} - -#define checkVMMSupportedRetVal(device) {\ - int value = 0;\ - hipDeviceAttribute_t\ - attr = hipDeviceAttributeVirtualMemoryManagementSupported;\ - HIP_CHECK(\ - hipDeviceGetAttribute(&value, attr, device));\ - if (value == 0) {\ - HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test..");\ - return true;\ - }\ -} - -constexpr int threadsPerBlk = 64; \ No newline at end of file diff --git a/catch/unit/virtualMemoryManagement/CMakeLists.txt b/catch/unit/virtualMemoryManagement/CMakeLists.txt new file mode 100644 index 0000000000..f540fdd25b --- /dev/null +++ b/catch/unit/virtualMemoryManagement/CMakeLists.txt @@ -0,0 +1,55 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(TEST_SRC + hipMemGetAllocationGranularity.cc + hipMemRetainAllocationHandle.cc) + +if(HIP_PLATFORM MATCHES "nvidia") +set(TEST_SRC + ${TEST_SRC} + hipMemMapArrayAsync.cc) + +if(UNIX) # Disabled on AMD due to defect EXSWHTEC-375 + set(TEST_SRC + ${TEST_SRC} + hipMemExportToShareableHandle.cc + hipMemImportFromShareableHandle.cc) +endif() + +endif() + +if(HIP_PLATFORM MATCHES "amd") + set(TEST_SRC + ${TEST_SRC} + hipMemAddressFree.cc + hipMemAddressReserve.cc + hipMemCreate.cc + hipMemSetGetAccess.cc + hipMemGetAllocationPropertiesFromHandle.cc + hipMemMap.cc + hipMemRelease.cc + hipMemUnmap.cc + hipMemVmm_old.cc) +endif() + +hip_add_exe_to_target(NAME VirtualMemoryManagementTest + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests COMMON_SHARED_SRC ${COMMON_SHARED_SRC}) \ No newline at end of file diff --git a/catch/unit/memory/hipMemAddressFree.cc b/catch/unit/virtualMemoryManagement/hipMemAddressFree.cc similarity index 84% rename from catch/unit/memory/hipMemAddressFree.cc rename to catch/unit/virtualMemoryManagement/hipMemAddressFree.cc index 6890a26d8c..6171f830ac 100644 --- a/catch/unit/memory/hipMemAddressFree.cc +++ b/catch/unit/virtualMemoryManagement/hipMemAddressFree.cc @@ -23,12 +23,13 @@ THE SOFTWARE. /** * @addtogroup hipMemAddressFree hipMemAddressFree * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemAddressFree (void* devPtr, size_t size)` - * Frees an address range reservation made via hipMemAddressReserve. */ #include + #include "hip_vmm_common.hh" #define DATA_SIZE (1 << 13) @@ -38,7 +39,7 @@ THE SOFTWARE. * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemAddressFree.cc + * - unit/virtualMemoryManagement/hipMemAddressFree.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -50,16 +51,15 @@ TEST_CASE("Unit_hipMemAddressFree_negative") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate virtual address range hipDeviceptr_t ptrA; HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); @@ -68,9 +68,7 @@ TEST_CASE("Unit_hipMemAddressFree_negative") { REQUIRE(hipMemAddressFree(nullptr, size_mem) == hipErrorInvalidValue); } - SECTION("pass zero to size") { - REQUIRE(hipMemAddressFree(ptrA, 0) == hipErrorInvalidValue); - } + SECTION("pass zero to size") { REQUIRE(hipMemAddressFree(ptrA, 0) == hipErrorInvalidValue); } HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); } diff --git a/catch/unit/memory/hipMemAddressReserve.cc b/catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc similarity index 83% rename from catch/unit/memory/hipMemAddressReserve.cc rename to catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc index c6a76fc56c..c19f8ada43 100644 --- a/catch/unit/memory/hipMemAddressReserve.cc +++ b/catch/unit/virtualMemoryManagement/hipMemAddressReserve.cc @@ -23,7 +23,7 @@ THE SOFTWARE. /** * @addtogroup hipMemAddressReserve hipMemAddressReserve * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemAddressReserve (void** ptr, * size_t size, * size_t alignment, @@ -33,6 +33,7 @@ THE SOFTWARE. */ #include + #include "hip_vmm_common.hh" #define DATA_SIZE (1 << 13) @@ -43,7 +44,7 @@ THE SOFTWARE. * - Verify if reserved address returned by hipMemAddressReserve * for different alignment values are correctly aligned. * ------------------------ - * - catch\unit\memory\hipMemAddressReserve.cc + * - unit/virtualMemoryManagement/hipMemAddressReserve.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -56,16 +57,15 @@ TEST_CASE("Unit_hipMemAddressReserve_AlignmentTest") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate virtual address range hipDeviceptr_t ptrA; size_t alignmnt = 1; @@ -106,7 +106,7 @@ TEST_CASE("Unit_hipMemAddressReserve_AlignmentTest") { * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemAddressReserve.cc + * - unit/virtualMemoryManagement/hipMemAddressReserve.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -118,35 +118,33 @@ TEST_CASE("Unit_hipMemAddressReserve_Negative") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate virtual address range hipDeviceptr_t ptrA; + SECTION("Nullptr to ptr") { - REQUIRE(hipMemAddressReserve(nullptr, size_mem, 0, 0, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemAddressReserve(nullptr, size_mem, 0, 0, 0) == hipErrorInvalidValue); } SECTION("pass size as 0") { - REQUIRE(hipMemAddressReserve(&ptrA, 0, 0, 0, 0) == - hipErrorMemoryAllocation); + REQUIRE(hipMemAddressReserve(&ptrA, 0, 0, 0, 0) == hipErrorMemoryAllocation); } + #if HT_NVIDIA SECTION("pass non power of two for alignment") { - REQUIRE(hipMemAddressReserve(&ptrA, size_mem, 3, 0, 0) == - hipErrorMemoryAllocation); + REQUIRE(hipMemAddressReserve(&ptrA, size_mem, 3, 0, 0) == hipErrorMemoryAllocation); } #endif + SECTION("pass size as non multiple of host page size") { - REQUIRE(hipMemAddressReserve(&ptrA, (size_mem - 1), 0, 0, 0) == - hipErrorMemoryAllocation); + REQUIRE(hipMemAddressReserve(&ptrA, (size_mem - 1), 0, 0, 0) == hipErrorMemoryAllocation); } } diff --git a/catch/unit/memory/hipMemCreate.cc b/catch/unit/virtualMemoryManagement/hipMemCreate.cc similarity index 84% rename from catch/unit/memory/hipMemCreate.cc rename to catch/unit/virtualMemoryManagement/hipMemCreate.cc index 83f6ff7c01..4cc52786ad 100644 --- a/catch/unit/memory/hipMemCreate.cc +++ b/catch/unit/virtualMemoryManagement/hipMemCreate.cc @@ -19,20 +19,23 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + /** * @addtogroup hipMemCreate hipMemCreate * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemCreate (hipMemGenericAllocationHandle_t* handle, * size_t size, * const hipMemAllocationProp* prop, * unsigned long long flags)` - * Creates a memory allocation described by the properties and size. */ -#include "hip_vmm_common.hh" + #include #include +#include "hip_vmm_common.hh" + #define THREADS_PER_BLOCK 512 #define NUM_OF_BUFFERS 3 #define DATA_SIZE (1 << 13) @@ -52,7 +55,7 @@ static __global__ void square_kernel(int* Buff) { * - Allocate physical memories for different multiples of * granularity and deallocate them. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -62,19 +65,18 @@ TEST_CASE("Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); hipMemGenericAllocationHandle_t handle; // Allocate physical memory for (int mul = 1; mul < 64; mul++) { - HIP_CHECK(hipMemCreate(&handle, granularity*mul, &prop, 0)); + HIP_CHECK(hipMemCreate(&handle, granularity * mul, &prop, 0)); HIP_CHECK(hipMemRelease(handle)); } } @@ -87,7 +89,7 @@ TEST_CASE("Unit_hipMemCreate_BasicAllocateDeAlloc_MultGranularity") { * and back to host. Verify the result. Release handle at end after * unmapping VMM range. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -99,17 +101,15 @@ TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -146,7 +146,7 @@ TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPostUnmap") { * to VMM memory and back to host. Verify the result. Release * handle before the VMM range is used. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -158,17 +158,15 @@ TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -205,12 +203,11 @@ TEST_CASE("Unit_hipMemCreate_ChkDev2HstMemcpy_ReleaseHdlPreUse") { * to device, launch kernel to square the data, copy data back * to host. Verify the result. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ - TEST_CASE("Unit_hipMemCreate_ChkWithKerLaunch") { size_t granularity = 0; constexpr int N = DATA_SIZE; @@ -218,17 +215,15 @@ TEST_CASE("Unit_hipMemCreate_ChkWithKerLaunch") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -248,12 +243,12 @@ TEST_CASE("Unit_hipMemCreate_ChkWithKerLaunch") { // Initialize with data for (size_t idx = 0; idx < N; idx++) { A_h[idx] = idx; - C_h[idx] = idx*idx; + C_h[idx] = idx * idx; } HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); // Invoke kernel - hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), - dim3(THREADS_PER_BLOCK), 0, 0, static_cast(ptrA)); + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); HIP_CHECK(hipDeviceSynchronize()); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); @@ -269,7 +264,7 @@ TEST_CASE("Unit_hipMemCreate_ChkWithKerLaunch") { * device permission, copy data from host to device, launch kernel * to square the data, copy data back to host. Verify the result. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -282,17 +277,15 @@ TEST_CASE("Unit_hipMemCreate_MapNonContiguousChunks") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle[NUM_OF_BUFFERS]; // Allocate 3 physical memory chunks for (int count = 0; count < numOfBuffers; count++) { @@ -304,8 +297,7 @@ TEST_CASE("Unit_hipMemCreate_MapNonContiguousChunks") { for (int idx = 0; idx < numOfBuffers; idx++) { uint64_t uiptr = reinterpret_cast(ptrA); uiptr = uiptr + idx * size_mem; - HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), size_mem, 0, - handle[idx], 0)); + HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), size_mem, 0, handle[idx], 0)); HIP_CHECK(hipMemRelease(handle[idx])); } hipMemAccessDesc accessDesc = {}; @@ -315,16 +307,16 @@ TEST_CASE("Unit_hipMemCreate_MapNonContiguousChunks") { // Make the address accessible to GPU 0 HIP_CHECK(hipMemSetAccess(ptrA, (numOfBuffers * size_mem), &accessDesc, 1)); std::vector A_h(numOfBuffers * size_mem), B_h(numOfBuffers * size_mem), - C_h(numOfBuffers * size_mem); + C_h(numOfBuffers * size_mem); // Fill Data for (size_t idx = 0; idx < (numOfBuffers * N); idx++) { A_h[idx] = idx; - C_h[idx] = idx*idx; + C_h[idx] = idx * idx; } HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), numOfBuffers * buffer_size)); // Launch square kernel - hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), - dim3(THREADS_PER_BLOCK), 0, 0, static_cast(ptrA)); + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, numOfBuffers * buffer_size)); HIP_CHECK(hipDeviceSynchronize()); // Validate Results @@ -344,7 +336,7 @@ TEST_CASE("Unit_hipMemCreate_MapNonContiguousChunks") { * to the VMM address range. Memset the VMM address range with initial * value. Validate. * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -357,17 +349,15 @@ TEST_CASE("Unit_hipMemCreate_ChkWithMemset") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -398,7 +388,7 @@ TEST_CASE("Unit_hipMemCreate_ChkWithMemset") { * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemCreate.cc + * - unit/virtualMemoryManagement/hipMemCreate.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -408,58 +398,48 @@ TEST_CASE("Unit_hipMemCreate_Negative") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemGenericAllocationHandle_t handle; hipMemAllocationProp prop = {}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Device HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); SECTION("Nullptr to handle") { - REQUIRE(hipMemCreate(nullptr, granularity, &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(nullptr, granularity, &prop, 0) == hipErrorInvalidValue); } SECTION("Nullptr to prop") { - REQUIRE(hipMemCreate(&handle, granularity, nullptr, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, granularity, nullptr, 0) == hipErrorInvalidValue); } - SECTION("pass size as 0") { - REQUIRE(hipMemCreate(&handle, 0, &prop, 0) == hipErrorInvalidValue); - } + SECTION("pass size as 0") { REQUIRE(hipMemCreate(&handle, 0, &prop, 0) == hipErrorInvalidValue); } SECTION("Pass prop type as invalid") { prop.type = hipMemAllocationTypeInvalid; - REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); } SECTION("pass location as invalid") { prop.location.type = hipMemLocationTypeInvalid; - REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); } SECTION("non multiple of granularity") { - REQUIRE(hipMemCreate(&handle, (granularity - 1), &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, (granularity - 1), &prop, 0) == hipErrorInvalidValue); } SECTION("pass location id as -1") { prop.location.id = -1; // set to non existing device - REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); } SECTION("pass location id as > highest device number") { int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); prop.location.id = numDevices; // set to non existing device - REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemCreate(&handle, granularity, &prop, 0) == hipErrorInvalidValue); } } diff --git a/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc b/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc new file mode 100644 index 0000000000..d5c4b5394e --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc @@ -0,0 +1,145 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemExportToShareableHandle hipMemExportToShareableHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemExportToShareableHandle(void *shareableHandle, + * hipMemGenericAllocationHandle_t handle, + * hipMemAllocationHandleType handleType, + * unsigned long long flags)` - + * Exports an allocation to a requested shareable handle type. + */ + +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemExportToShareableHandle_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + REQUIRE(shareable_handle != nullptr); + + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemExportToShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemExportToShareableHandle_Negative_Parameters") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + + SECTION("shareableHandle == nullptr") { + HIP_CHECK_ERROR( + hipMemExportToShareableHandle(nullptr, handle, hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } + +#if HT_AMD + SECTION("handle == nullptr") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, nullptr, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } +#endif + + SECTION("invalid handleType") { + HIP_CHECK_ERROR( + hipMemExportToShareableHandle(&shareable_handle, handle, hipMemHandleTypeWin32, 0), + hipErrorInvalidValue); + } + + SECTION("non-zero flags") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 1), + hipErrorInvalidValue); + } + + HIP_CHECK(hipMemRelease(handle)); + +#if HT_AMD // segfaults on NVIDIA + SECTION("released handle") { + HIP_CHECK_ERROR(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0), + hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/memory/hipMemGetAllocationGranularity.cc b/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc similarity index 73% rename from catch/unit/memory/hipMemGetAllocationGranularity.cc rename to catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc index 5f5821de48..0f45a53810 100644 --- a/catch/unit/memory/hipMemGetAllocationGranularity.cc +++ b/catch/unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc @@ -23,7 +23,7 @@ THE SOFTWARE. /** * @addtogroup hipMemGetAllocationGranularity hipMemGetAllocationGranularity * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemGetAllocationGranularity (size_t* granularity, * const hipMemAllocationProp* prop, * hipMemAllocationGranularity_flags option)` - @@ -33,14 +33,13 @@ THE SOFTWARE. #include #include #include + #include "hip_vmm_common.hh" /** local function to invoke hipMemGetAllocationGranularity. */ -void getGranularity(size_t *granularity, - hipMemAllocationGranularity_flags option, - int device) { +void getGranularity(size_t* granularity, hipMemAllocationGranularity_flags option, int device) { hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; @@ -54,16 +53,17 @@ void getGranularity(size_t *granularity, * - Functional Test to get granularity size for * hipMemAllocationGranularityMinimum option. * ------------------------ - * - catch\unit\memory\hipMemGetAllocationGranularity.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") { + HIP_CHECK(hipFree(0)); size_t granularity = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device) + checkVMMSupported(device); getGranularity(&granularity, hipMemAllocationGranularityMinimum, 0); REQUIRE(granularity > 0); } @@ -74,16 +74,17 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") { * - Functional Test to get granularity size for * hipMemAllocationGranularityRecommended option. * ------------------------ - * - catch\unit\memory\hipMemGetAllocationGranularity.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") { + HIP_CHECK(hipFree(0)); size_t granularity = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device) + checkVMMSupported(device); getGranularity(&granularity, hipMemAllocationGranularityRecommended, 0); REQUIRE(granularity > 0); } @@ -94,21 +95,21 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") { * - Functional Test to get granularity size for * hipMemAllocationGranularityMinimum option for all GPUs. * ------------------------ - * - catch\unit\memory\hipMemGetAllocationGranularity.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") { + HIP_CHECK(hipFree(0)); int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); for (int dev = 0; dev < numDevices; dev++) { size_t granularity = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, dev)); - checkVMMSupported(device) - getGranularity(&granularity, hipMemAllocationGranularityRecommended, - dev); + checkVMMSupported(device); + getGranularity(&granularity, hipMemAllocationGranularityRecommended, dev); REQUIRE(granularity > 0); } } @@ -118,16 +119,17 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") { * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemGetAllocationGranularity.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemGetAllocationGranularity_NegativeTests") { + HIP_CHECK(hipFree(0)); size_t granularity = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; @@ -135,45 +137,48 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_NegativeTests") { SECTION("Granularity is nullptr") { REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(nullptr, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(nullptr, &prop, hipMemAllocationGranularityMinimum)); } +#if HT_AMD // segfaults on NVIDIA SECTION("Prop is nullptr") { - REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, nullptr, - hipMemAllocationGranularityMinimum)); + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, nullptr, hipMemAllocationGranularityMinimum)); } +#endif #if HT_NVIDIA SECTION("flag is invalid") { REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, &prop, - (hipMemAllocationGranularity_flags)0xff)); + hipMemGetAllocationGranularity(&granularity, &prop, + (hipMemAllocationGranularity_flags)0xff)); } #endif +#if HT_AMD // succeeds on NVIDIA SECTION("device id > highest device id") { int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); prop.location.id = numDevices; // set to non existing device - REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); } SECTION("device id < lowest device id") { prop.location.id = -1; // set to non existing device - REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); } SECTION("allocation type as invalid") { prop.type = hipMemAllocationTypeInvalid; - REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); } SECTION("location type as invalid") { prop.location.type = hipMemLocationTypeInvalid; - REQUIRE(hipErrorInvalidValue == - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + REQUIRE( + hipErrorInvalidValue == + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); } +#endif } diff --git a/catch/unit/memory/hipMemGetAllocationPropertiesFromHandle.cc b/catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc similarity index 84% rename from catch/unit/memory/hipMemGetAllocationPropertiesFromHandle.cc rename to catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc index 5c348a85f8..cc3a8dc519 100644 --- a/catch/unit/memory/hipMemGetAllocationPropertiesFromHandle.cc +++ b/catch/unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc @@ -20,26 +20,27 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include -#include "hip_vmm_common.hh" - -#define DATA_SIZE (1 << 13) - /** * @addtogroup hipMemGetAllocationPropertiesFromHandle hipMemGetAllocationPropertiesFromHandle * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemGetAllocationPropertiesFromHandle(hipMemAllocationProp* prop, * hipMemGenericAllocationHandle_t handle)` - * Retrieve the property structure of the given handle. */ +#include + +#include "hip_vmm_common.hh" + +#define DATA_SIZE (1 << 13) + /** * Test Description * ------------------------ * - Functional test to verify the values of hipMemAllocationProp properties. * ------------------------ - * - catch\unit\memory\hipMemGetAllocationPropertiesFromHandle.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -47,7 +48,7 @@ THE SOFTWARE. TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_functional") { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemGenericAllocationHandle_t handle; hipMemAllocationProp prop = {}; prop.type = hipMemAllocationTypePinned; @@ -58,11 +59,10 @@ TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_functional") { size_t granularity = 0; int N = DATA_SIZE; size_t buffer_size = N * sizeof(int); - HIP_CHECK(hipMemGetAllocationGranularity - (&granularity, &prop, hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t mem_size = ((granularity + buffer_size - 1) / granularity) - * granularity; + size_t mem_size = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, mem_size, &prop, 0)); // verify properties has been retrived from handle @@ -78,7 +78,7 @@ TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_functional") { * ------------------------ * - Negative Tests. * ------------------------ - * - catch\unit\memory\hipMemGetAllocationPropertiesFromHandle.cc + * - unit/virtualMemoryManagement/hipMemGetAllocationPropertiesFromHandle.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -86,7 +86,7 @@ TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_functional") { TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_Negative") { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, 0)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemGenericAllocationHandle_t handle; hipMemAllocationProp prop = {}; prop.type = hipMemAllocationTypePinned; @@ -97,23 +97,21 @@ TEST_CASE("Unit_hipMemGetAllocationPropertiesFromHandle_Negative") { size_t granularity = 0; int N = DATA_SIZE; size_t buffer_size = N * sizeof(int); - HIP_CHECK(hipMemGetAllocationGranularity - (&granularity, &prop, hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t mem_size = ((granularity + buffer_size - 1) / granularity) - * granularity; + size_t mem_size = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, mem_size, &prop, 0)); SECTION("Nullptr as prop") { - REQUIRE(hipMemGetAllocationPropertiesFromHandle(nullptr, handle) - == hipErrorInvalidValue); + REQUIRE(hipMemGetAllocationPropertiesFromHandle(nullptr, handle) == hipErrorInvalidValue); } SECTION("null handle") { prop.location.type = hipMemLocationTypeInvalid; - REQUIRE(hipMemGetAllocationPropertiesFromHandle(&prop_temp, nullptr) - == hipErrorInvalidValue); + REQUIRE(hipMemGetAllocationPropertiesFromHandle(&prop_temp, nullptr) == hipErrorInvalidValue); } + HIP_CHECK(hipMemRelease(handle)); } diff --git a/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc b/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc new file mode 100644 index 0000000000..f362e4f049 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc @@ -0,0 +1,210 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemExportToShareableHandle hipMemExportToShareableHandle + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemImportFromShareableHandle(hipMemGenericAllocationHandle_t *handle, + * void *osHandle, + * hipMemAllocationHandleType shHandleType)` - + * Imports an allocation from a requested shareable handle type. + */ + +#include +#include +#include + +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemGenericAllocationHandle_t imported_handle; + HIP_CHECK(hipMemImportFromShareableHandle(&imported_handle, shareable_handle, + hipMemHandleTypePosixFileDescriptor)); + + HIP_CHECK(hipMemRelease(handle)); +} + +/** + * Test Description + * ------------------------ + * - Basic multiprocess sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Positive_MultiProc") { + int fd[2]; + REQUIRE(pipe(fd) == 0); + + auto pid = fork(); + REQUIRE(pid >= 0); + + if (pid == 0) { // child + REQUIRE(close(fd[1]) == 0); + + void* shareable_handle = nullptr; + REQUIRE(read(fd[0], &shareable_handle, sizeof(shareable_handle)) >= 0); + REQUIRE(close(fd[0]) == 0); + + REQUIRE(shareable_handle != nullptr); + + HIP_CHECK(hipFree(0)); + + hipMemGenericAllocationHandle_t imported_handle; + HIP_CHECK(hipMemImportFromShareableHandle(&imported_handle, shareable_handle, + hipMemHandleTypePosixFileDescriptor)); + + exit(0); + } else { // parent + REQUIRE(close(fd[0]) == 0); + + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + REQUIRE(write(fd[1], &shareable_handle, sizeof(shareable_handle)) >= 0); + REQUIRE(close(fd[1]) == 0); + + REQUIRE(wait(NULL) >= 0); + + HIP_CHECK(hipMemRelease(handle)); + } +} + +/** + * Test Description + * ------------------------ + * - Negative parameters test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemImportFromShareableHandle.cc + * Test requirements + * ------------------------ + * - Host specific (LINUX) + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemImportFromShareableHandle_Negative_Parameters") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.requestedHandleTypes = hipMemHandleTypePosixFileDescriptor; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity * 2, &prop, 0)); + + void* shareable_handle = nullptr; + HIP_CHECK(hipMemExportToShareableHandle(&shareable_handle, handle, + hipMemHandleTypePosixFileDescriptor, 0)); + + hipMemGenericAllocationHandle_t imported_handle; + +#if HT_AMD + SECTION("handle == nullptr") { + HIP_CHECK_ERROR(hipMemImportFromShareableHandle(nullptr, shareable_handle, + hipMemHandleTypePosixFileDescriptor), + hipErrorInvalidValue); + } +#endif + + SECTION("shareableHandle == nullptr") { + HIP_CHECK_ERROR(hipMemImportFromShareableHandle(&imported_handle, nullptr, + hipMemHandleTypePosixFileDescriptor), + hipErrorInvalidValue); + } + + SECTION("invalid handleType") { + HIP_CHECK_ERROR( + hipMemImportFromShareableHandle(&imported_handle, shareable_handle, hipMemHandleTypeWin32), + hipErrorNotSupported); + } + + HIP_CHECK(hipMemRelease(handle)); +} \ No newline at end of file diff --git a/catch/unit/memory/hipMemMap.cc b/catch/unit/virtualMemoryManagement/hipMemMap.cc similarity index 86% rename from catch/unit/memory/hipMemMap.cc rename to catch/unit/virtualMemoryManagement/hipMemMap.cc index d9b2d13a26..7a46f0f3cb 100644 --- a/catch/unit/memory/hipMemMap.cc +++ b/catch/unit/virtualMemoryManagement/hipMemMap.cc @@ -19,10 +19,11 @@ LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + /** * @addtogroup hipMemMap hipMemMap * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemMap (void* ptr, * size_t size, * size_t offset, @@ -32,6 +33,7 @@ THE SOFTWARE. */ #include + #include "hip_vmm_common.hh" constexpr int N = (1 << 13); @@ -45,7 +47,7 @@ constexpr int initializer = 0; * vmm address range repeatedly. This test validates physical memory * euse using same vmm range. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -57,24 +59,22 @@ TEST_CASE("Unit_hipMemMap_SameMemoryReuse") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N), C_h(N); // Initialize with data for (size_t idx = 0; idx < N; idx++) { A_h[idx] = idx; - C_h[idx] = idx*idx; + C_h[idx] = idx * idx; } // Allocate a physical memory chunk HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -94,8 +94,7 @@ TEST_CASE("Unit_hipMemMap_SameMemoryReuse") { HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); #if HT_NVIDIA - square_kernel <<>>( - static_cast(ptrA)); + square_kernel<<>>(static_cast(ptrA)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); HIP_CHECK(hipStreamSynchronize(0)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); @@ -114,7 +113,7 @@ TEST_CASE("Unit_hipMemMap_SameMemoryReuse") { * vmm addresses. This test validates physical memory reuse using * different vmm ranges. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -125,24 +124,22 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N), C_h(N); // Initialize with data for (size_t idx = 0; idx < N; idx++) { A_h[idx] = idx; - C_h[idx] = idx*idx; + C_h[idx] = idx * idx; } // Allocate a physical memory chunk HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -164,8 +161,8 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU") { HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[buf], buffer_size)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); #if HT_NVIDIA - square_kernel <<>>( - static_cast(ptrA[buf])); + square_kernel<<>>( + static_cast(ptrA[buf])); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA[buf], buffer_size)); HIP_CHECK(hipStreamSynchronize(0)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); @@ -186,7 +183,7 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_SingleGPU") { * vmm addresses at the same time and check data values integrity * between different VMMs. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -197,17 +194,15 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemory_Map2MultVMMs") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N); @@ -253,7 +248,7 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemory_Map2MultVMMs") { * multiple vmm addresses. This test validates physical memory * reuse using different vmm ranges on multiple devices. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -270,17 +265,15 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_MultiDev") { for (int devX = 0; devX < devicecount; devX++) { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, devX)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N); @@ -328,7 +321,7 @@ TEST_CASE("Unit_hipMemMap_PhysicalMemoryReuse_MultiDev") { * for single vmm address. This test validates VMM memory reuse * using different physical ranges. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -339,24 +332,22 @@ TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_SingleGPU") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle[num_buf]; // Allocate host memory and intialize data std::vector A_h(N), B_h(N), C_h(N); // Initialize with data for (size_t idx = 0; idx < N; idx++) { A_h[idx] = idx; - C_h[idx] = idx*idx; + C_h[idx] = idx * idx; } // Allocate a physical memory chunk for (int buf = 0; buf < num_buf; buf++) { @@ -379,8 +370,7 @@ TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_SingleGPU") { HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); #if HT_NVIDIA - square_kernel <<>>( - static_cast(ptrA)); + square_kernel<<>>(static_cast(ptrA)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); HIP_CHECK(hipStreamSynchronize(0)); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), C_h.data())); @@ -401,7 +391,7 @@ TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_SingleGPU") { * can be mapped/unmapped to single vmm address. This test validates VMM * memory reuse using different physical ranges. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -417,17 +407,15 @@ TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_MultiGPU") { size_t buffer_size = N * sizeof(int); hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; std::vector handle(devicecount); // Allocate host memory and intialize data std::vector A_h(N), B_h(N); @@ -493,7 +481,7 @@ TEST_CASE("Unit_hipMemMap_VMMMemoryReuse_MultiGPU") { * - Check if a partial part of a physical chunk can be mapped/unmapped * to a smaller vmm address. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -504,17 +492,15 @@ TEST_CASE("Unit_hipMemMap_MapPartialPhysicalMem") { size_t buffer_size = N * sizeof(int); hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N); @@ -523,7 +509,7 @@ TEST_CASE("Unit_hipMemMap_MapPartialPhysicalMem") { A_h[idx] = idx; } // Allocate a bigger physical memory chunk of twice size_mem - HIP_CHECK(hipMemCreate(&handle, 2*size_mem, &prop, 0)); + HIP_CHECK(hipMemCreate(&handle, 2 * size_mem, &prop, 0)); // Allocate virtual address range of size size_mem hipDeviceptr_t ptrA; HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); @@ -549,7 +535,7 @@ TEST_CASE("Unit_hipMemMap_MapPartialPhysicalMem") { * - Check if a partial part of a VMM range can be mapped/unmapped * to a physical address. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -560,17 +546,15 @@ TEST_CASE("Unit_hipMemMap_MapPartialVMMMem") { size_t buffer_size = N * sizeof(int); hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N); @@ -582,13 +566,13 @@ TEST_CASE("Unit_hipMemMap_MapPartialVMMMem") { HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range of size twice size_mem hipDeviceptr_t ptrA; - HIP_CHECK(hipMemAddressReserve(&ptrA, 2*size_mem, 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, 2 * size_mem, 0, 0, 0)); hipMemAccessDesc accessDesc = {}; accessDesc.location.type = hipMemLocationTypeDevice; accessDesc.location.id = device; accessDesc.flags = hipMemAccessFlagsProtReadWrite; std::fill(B_h.begin(), B_h.end(), initializer); - HIP_CHECK(hipMemMap(ptrA , size_mem, 0, handle, 0)); + HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); @@ -596,7 +580,7 @@ TEST_CASE("Unit_hipMemMap_MapPartialVMMMem") { HIP_CHECK(hipMemUnmap(ptrA, size_mem)); // Release resources HIP_CHECK(hipMemRelease(handle)); - HIP_CHECK(hipMemAddressFree(ptrA, 2*size_mem)); + HIP_CHECK(hipMemAddressFree(ptrA, 2 * size_mem)); } /** @@ -604,7 +588,7 @@ TEST_CASE("Unit_hipMemMap_MapPartialVMMMem") { * ------------------------ * - Negative Argument Tests * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -615,16 +599,15 @@ TEST_CASE("Unit_hipMemMap_negative") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; // Allocate physical memory @@ -633,8 +616,7 @@ TEST_CASE("Unit_hipMemMap_negative") { HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); SECTION("nullptr to ptrA") { - REQUIRE(hipMemMap(nullptr, size_mem, 0, handle, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemMap(nullptr, size_mem, 0, handle, 0) == hipErrorInvalidValue); } SECTION("pass zero to size") { @@ -642,9 +624,9 @@ TEST_CASE("Unit_hipMemMap_negative") { } SECTION("pass negative to offset") { - REQUIRE(hipMemMap(&ptrA, size_mem, -1, handle, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemMap(&ptrA, size_mem, -1, handle, 0) == hipErrorInvalidValue); } + HIP_CHECK(hipMemRelease(handle)); HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); } diff --git a/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc b/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc new file mode 100644 index 0000000000..2bc726f82c --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hipMemMapArrayAsync.cc @@ -0,0 +1,110 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup hipMemMapArrayAsync hipMemMapArrayAsync + * @{ + * @ingroup VirtualMemoryManagementTest + * `hipError_t hipMemMapArrayAsync(hipArrayMapInfo *mapInfoList, + * unsigned int count, + * hipStream_t stream)` - + * Maps or unmaps subregions of sparse HIP arrays and sparse HIP mipmapped arrays. + */ + +#include +#include +#include + +#include "hip_vmm_common.hh" + +/** + * Test Description + * ------------------------ + * - Basic sanity test. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemMapArrayAsync.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemMapArrayAsync_Positive_Basic") { + HIP_CHECK(hipFree(0)); + + hipDevice_t device; + HIP_CHECK(hipDeviceGet(&device, 0)); + checkVMMSupported(device); + + CHECK_IMAGE_SUPPORT; + + hipmipmappedArray array; + + HIP_ARRAY3D_DESCRIPTOR desc = {}; + using vec_info = vector_info; + desc.Format = vec_info::format; + desc.NumChannels = vec_info::size; + desc.Width = 1; + desc.Height = 1; + desc.Flags = CUDA_ARRAY3D_SPARSE; + + unsigned int levels = 2; + + HIP_CHECK(hipMipmappedArrayCreate(&array, &desc, levels)); + + hipMemAllocationProp prop = {}; + prop.type = hipMemAllocationTypePinned; + prop.location.type = hipMemLocationTypeDevice; + prop.location.id = device; + prop.allocFlags.usage = CU_MEM_CREATE_USAGE_TILE_POOL; + + size_t granularity; + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityRecommended)); + + hipMemGenericAllocationHandle_t handle; + HIP_CHECK(hipMemCreate(&handle, granularity, &prop, 0)); + + hipArrayMapInfo map_info_list = {}; + map_info_list.resourceType = HIP_RESOURCE_TYPE_MIPMAPPED_ARRAY; + map_info_list.resource.mipmap = array; + map_info_list.subresourceType = hipArraySparseSubresourceTypeSparseLevel; + map_info_list.subresource.sparseLevel.extentWidth = 1; + map_info_list.subresource.sparseLevel.extentHeight = 1; + map_info_list.subresource.sparseLevel.extentDepth = 1; + map_info_list.memOperationType = hipMemOperationTypeMap; + map_info_list.memHandleType = hipMemHandleTypeGeneric; + map_info_list.memHandle.memHandle = handle; + map_info_list.deviceBitMask = 0x1; + + StreamGuard stream(Streams::created); + + HIP_CHECK(hipMemMapArrayAsync(&map_info_list, 1, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + map_info_list.memOperationType = hipMemOperationTypeUnmap; + map_info_list.memHandle.memHandle = NULL; + HIP_CHECK(hipMemMapArrayAsync(&map_info_list, 1, stream.stream())); + HIP_CHECK(hipStreamSynchronize(stream.stream())); + + HIP_CHECK(hipMemRelease(handle)); + + HIP_CHECK(hipMipmappedArrayDestroy(array)); +} \ No newline at end of file diff --git a/catch/unit/memory/hipMemRelease.cc b/catch/unit/virtualMemoryManagement/hipMemRelease.cc similarity index 89% rename from catch/unit/memory/hipMemRelease.cc rename to catch/unit/virtualMemoryManagement/hipMemRelease.cc index d43647da24..e544710757 100644 --- a/catch/unit/memory/hipMemRelease.cc +++ b/catch/unit/virtualMemoryManagement/hipMemRelease.cc @@ -23,7 +23,7 @@ THE SOFTWARE. /** * @addtogroup hipMemRelease hipMemRelease * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipMemRelease(hipMemGenericAllocationHandle_t handle)` - * Release a memory handle representing a memory allocation which was previously * allocated through hipMemCreate. @@ -36,13 +36,11 @@ THE SOFTWARE. * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemRelease.cc + * - unit/virtualMemoryManagement/hipMemRelease.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemRelease_negative") { - SECTION("Nullptr to handle") { - REQUIRE(hipMemRelease(nullptr) == hipErrorInvalidValue); - } + SECTION("Nullptr to handle") { REQUIRE(hipMemRelease(nullptr) == hipErrorInvalidValue); } } diff --git a/catch/unit/memory/hipMemRetainAllocationHandle.cc b/catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc similarity index 76% rename from catch/unit/memory/hipMemRetainAllocationHandle.cc rename to catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc index 29910c61aa..1b4d00dea3 100644 --- a/catch/unit/memory/hipMemRetainAllocationHandle.cc +++ b/catch/unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc @@ -23,7 +23,7 @@ THE SOFTWARE. /** * @addtogroup hipMemRetainAllocationHandle hipMemRetainAllocationHandle * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemRetainAllocationHandle(hipMemGenericAllocationHandle_t* handle, * void* addr)` - * Returns the allocation handle of the backing memory allocation given the address. @@ -31,6 +31,7 @@ THE SOFTWARE. #include #include + #include "hip_vmm_common.hh" #define DATA_SIZE (1 << 13) @@ -41,41 +42,39 @@ THE SOFTWARE. * - Create a VM mapped to physical memory. Input addr to * hipMemRetainAllocationHandle and validate the handle. * ------------------------ - * - catch\unit\memory\hipMemRetainAllocationHandle.cc + * - unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemRetainAllocationHandle_SetGet") { + HIP_CHECK(hipFree(0)); size_t granularity = 0; constexpr int N = DATA_SIZE; size_t buffer_size = N * sizeof(int); int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); // Test hipMemRetainAllocationHandle hipMemGenericAllocationHandle_t gethandle; // Check beginning of VMM ptr - HIP_CHECK(hipMemRetainAllocationHandle(&gethandle, - reinterpret_cast(ptrA))); + HIP_CHECK(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrA))); REQUIRE(gethandle == handle); HIP_CHECK(hipMemRelease(handle)); HIP_CHECK(hipMemUnmap(ptrA, size_mem)); @@ -87,58 +86,56 @@ TEST_CASE("Unit_hipMemRetainAllocationHandle_SetGet") { * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemRetainAllocationHandle.cc + * - unit/virtualMemoryManagement/hipMemRetainAllocationHandle.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 */ TEST_CASE("Unit_hipMemRetainAllocationHandle_NegTst") { + HIP_CHECK(hipFree(0)); size_t granularity = 0; constexpr int N = DATA_SIZE; size_t buffer_size = N * sizeof(int); int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); // Test hipMemRetainAllocationHandle hipMemGenericAllocationHandle_t gethandle; SECTION("nullptr handle") { - REQUIRE(hipMemRetainAllocationHandle(nullptr, - reinterpret_cast(ptrA)) == hipErrorInvalidValue); + REQUIRE(hipMemRetainAllocationHandle(nullptr, reinterpret_cast(ptrA)) == + hipErrorInvalidValue); } SECTION("nullptr Vmm ptr") { - REQUIRE(hipMemRetainAllocationHandle(&gethandle, nullptr) == - hipErrorInvalidValue); + REQUIRE(hipMemRetainAllocationHandle(&gethandle, nullptr) == hipErrorInvalidValue); } SECTION("not mapped address") { hipDeviceptr_t ptrB; HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem, 0, 0, 0)); - REQUIRE(hipMemRetainAllocationHandle(&gethandle, - reinterpret_cast(ptrB)) == hipErrorInvalidValue); + REQUIRE(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrB)) == + hipErrorInvalidValue); HIP_CHECK(hipMemAddressFree(ptrB, size_mem)); } HIP_CHECK(hipMemRelease(handle)); HIP_CHECK(hipMemUnmap(ptrA, size_mem)); SECTION("unmapped address") { - REQUIRE(hipMemRetainAllocationHandle(&gethandle, - reinterpret_cast(ptrA)) == hipErrorInvalidValue); + REQUIRE(hipMemRetainAllocationHandle(&gethandle, reinterpret_cast(ptrA)) == + hipErrorInvalidValue); } HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); } diff --git a/catch/unit/memory/hipMemSetGetAccess.cc b/catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc similarity index 83% rename from catch/unit/memory/hipMemSetGetAccess.cc rename to catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc index 20910f9803..dca05c5f1a 100644 --- a/catch/unit/memory/hipMemSetGetAccess.cc +++ b/catch/unit/virtualMemoryManagement/hipMemSetGetAccess.cc @@ -23,7 +23,7 @@ THE SOFTWARE. /** * @addtogroup hipMemSetAccess hipMemSetAccess * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemSetAccess (void* ptr, * size_t size, * const hipMemAccessDesc* desc, @@ -31,18 +31,23 @@ THE SOFTWARE. * Set the access flags for each location specified in desc for the given * virtual address range. */ -#include "hipMallocManagedCommon.hh" -#include "hip_vmm_common.hh" -#include -#include + #ifdef __linux__ #include #include #endif + +#include +#include + +#include "hipMallocManagedCommon.hh" +#include "hip_vmm_common.hh" + #define THREADS_PER_BLOCK 512 #define NUM_OF_BUFFERS 3 #define DATA_SIZE (1 << 13) -#define NEW_DATA_SIZE (2*DATA_SIZE) +#define NEW_DATA_SIZE (2 * DATA_SIZE) + constexpr int initializer = 0; /** @@ -63,7 +68,7 @@ static __global__ void square_kernel(int* Buff) { * Validate that flags = hipMemAccessFlagsProtNone is returned by * hipMemGetAccess() when location is set to device 1. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -75,23 +80,21 @@ TEST_CASE("Unit_hipMemSetAccess_SetGet") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range hipDeviceptr_t ptrA; - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -132,7 +135,7 @@ TEST_CASE("Unit_hipMemSetAccess_SetGet") { * flags = hipMemAccessFlagsProtReadWrite is returned by hipMemGetAccess() * when location is set to device 1. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -150,18 +153,17 @@ TEST_CASE("Unit_hipMemSetAccess_MultDevSetGet") { } HIP_CHECK(hipDeviceGet(&device0, deviceId)); - checkVMMSupported(device0) + checkVMMSupported(device0); HIP_CHECK(hipDeviceGet(&device1, (deviceId + 1))); - checkVMMSupported(device1) + checkVMMSupported(device1); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device0; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -203,7 +205,7 @@ TEST_CASE("Unit_hipMemSetAccess_MultDevSetGet") { * to device 0. Validate that flags = 3 is returned by hipMemGetAccess() * for entire virtual address range when location is set to device 0. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -215,23 +217,21 @@ TEST_CASE("Unit_hipMemSetAccess_EntireVMMRangeSetGet") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -250,8 +250,7 @@ TEST_CASE("Unit_hipMemSetAccess_EntireVMMRangeSetGet") { REQUIRE(flags == hipMemAccessFlagsProtReadWrite); uint64_t uiptr = reinterpret_cast(ptrA); uiptr += (size_mem - 1); - HIP_CHECK(hipMemGetAccess(&flags, &location, - reinterpret_cast(uiptr))); + HIP_CHECK(hipMemGetAccess(&flags, &location, reinterpret_cast(uiptr))); REQUIRE(flags == hipMemAccessFlagsProtReadWrite); HIP_CHECK(hipMemUnmap(ptrA, size_mem)); HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); @@ -262,7 +261,7 @@ TEST_CASE("Unit_hipMemSetAccess_EntireVMMRangeSetGet") { * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -274,23 +273,21 @@ TEST_CASE("Unit_hipMemGetAccess_NegTst") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -326,7 +323,7 @@ TEST_CASE("Unit_hipMemGetAccess_NegTst") { * address range, launch a kernel to perform operation on the data and * validate the result. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -340,16 +337,15 @@ TEST_CASE("Unit_hipMemSetAccess_FuncTstOnMultDev") { HIP_CHECK(hipGetDeviceCount(&devicecount)); for (deviceId = 0; deviceId < devicecount; deviceId++) { HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipDeviceptr_t ptrA; hipMemGenericAllocationHandle_t handle; @@ -371,9 +367,8 @@ TEST_CASE("Unit_hipMemSetAccess_FuncTstOnMultDev") { } HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); // Launch square kernel - hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), - dim3(THREADS_PER_BLOCK), 0, 0, - static_cast(ptrA)); + hipLaunchKernelGGL(square_kernel, dim3(N / THREADS_PER_BLOCK), dim3(THREADS_PER_BLOCK), 0, 0, + static_cast(ptrA)); HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, buffer_size)); HIP_CHECK(hipDeviceSynchronize()); REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); @@ -389,7 +384,7 @@ TEST_CASE("Unit_hipMemSetAccess_FuncTstOnMultDev") { * Access (Read/Write) the virtual pointer directly on host. * Ensure this behavior for all devices on host. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -407,17 +402,15 @@ TEST_CASE("Unit_hipMemSetAccess_AccessDirectlyFromHost") { for (int dev = 0; dev < devicecount; dev++) { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, dev)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate a physical memory chunk HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -453,7 +446,7 @@ TEST_CASE("Unit_hipMemSetAccess_AccessDirectlyFromHost") { * the property of the range to read only. Check if the memory * range can be read. * ------------------------ - * - catch\unit\memory\hipMemMap.cc + * - unit/virtualMemoryManagement/hipMemMap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -465,17 +458,15 @@ TEST_CASE("Unit_hipMemSetAccess_ChangeAccessProp") { int dev = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, dev)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices HIP_CHECK( - hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate host memory and intialize data std::vector A_h(N), B_h(N); // Initialize with data @@ -526,14 +517,12 @@ TEST_CASE("Unit_hipMemSetAccess_ChangeAccessProp") { SECTION("Check error while writing on Read-Only memory") { accessDesc.flags = hipMemAccessFlagsProtRead; HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); - REQUIRE(hipErrorInvalidValue == - hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + REQUIRE(hipErrorInvalidValue == hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); } SECTION("Check error while writing on inaccessible memory") { accessDesc.flags = hipMemAccessFlagsProtNone; HIP_CHECK(hipMemSetAccess(ptrA, size_mem, &accessDesc, 1)); - REQUIRE(hipErrorInvalidValue == - hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); + REQUIRE(hipErrorInvalidValue == hipMemcpyHtoD(ptrA, A_h.data(), buffer_size)); } #endif HIP_CHECK(hipMemUnmap(ptrA, size_mem)); @@ -541,6 +530,7 @@ TEST_CASE("Unit_hipMemSetAccess_ChangeAccessProp") { HIP_CHECK(hipMemRelease(handle)); HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); } + /** * Test Description * ------------------------ @@ -548,7 +538,7 @@ TEST_CASE("Unit_hipMemSetAccess_ChangeAccessProp") { * a Virtual Memory chunk and a Unified Memory chunk. Test if data can * be exchanged between these chunks. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -565,23 +555,21 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2UnifiedMemCpy") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA, ptrB; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -623,7 +611,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2UnifiedMemCpy") { * Memory chunk and a Device Memory chunk. Test if data can be exchanged * between these chunks. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -635,23 +623,21 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2DevMemCpy") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA, ptrB; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -682,7 +668,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2DevMemCpy") { * Peer Device Memory chunk. Test if data can be exchanged between * these chunks using hipMemcpyDtoD. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -694,23 +680,21 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerDevMemCpy") { int deviceId = 0, value = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -737,9 +721,8 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerDevMemCpy") { break; } HIP_CHECK(hipDeviceGet(&device_other, deviceId)); - HIP_CHECK(hipDeviceGetAttribute(&value, - hipDeviceAttributeVirtualMemoryManagementSupported, - device_other)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); if (value == 0) { // Virtual Memory Mgmt is not supported WARN("Machine does not support Virtual Memory Management\n"); @@ -764,7 +747,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerDevMemCpy") { * a Peer Device Memory chunk. Test if data can be exchanged between * these chunks using hipMemcpyPeer. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -776,23 +759,21 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy") { int deviceId = 0, value = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); // Allocate virtual address range - HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, - 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); HIP_CHECK(hipMemRelease(handle)); // Set access @@ -820,9 +801,8 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy") { break; } HIP_CHECK(hipDeviceGet(&device_other, deviceId)); - HIP_CHECK(hipDeviceGetAttribute(&value, - hipDeviceAttributeVirtualMemoryManagementSupported, - device_other)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); if (value == 0) { // Virtual Memory Mgmt is not supported WARN("Machine does not support Virtual Memory Management\n"); @@ -848,7 +828,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2PeerPeerMemCpy") { * address space in device 0(PtrB). Check if data can be copied from * PtrA -> PtrB using hipMemcpy. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -860,16 +840,15 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMMemCpy") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; // Allocate physical memory hipMemGenericAllocationHandle_t handle1, handle2; HIP_CHECK(hipMemCreate(&handle1, size_mem, &prop, 0)); @@ -912,7 +891,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMMemCpy") { * address space in device 1(PtrB). Check if data can be copied from * PtrA -> PtrB using hipMemcpyPeer. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -924,16 +903,15 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy") { int deviceId = 0, value = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -966,9 +944,8 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy") { } std::fill(B_h.begin(), B_h.end(), initializer); HIP_CHECK(hipDeviceGet(&device_other, deviceId)); - HIP_CHECK(hipDeviceGetAttribute(&value, - hipDeviceAttributeVirtualMemoryManagementSupported, - device_other)); + HIP_CHECK(hipDeviceGetAttribute(&value, hipDeviceAttributeVirtualMemoryManagementSupported, + device_other)); if (value == 0) { // Virtual Memory Mgmt is not supported WARN("Machine does not support Virtual Memory Management\n"); @@ -980,9 +957,8 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy") { prop_loc.location.type = hipMemLocationTypeDevice; prop_loc.location.id = device_other; // Current Devices HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop_loc, - hipMemAllocationGranularityMinimum)); - size_t size_mem_loc = - ((granularity + buffer_size - 1) / granularity) * granularity; + hipMemAllocationGranularityMinimum)); + size_t size_mem_loc = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle_loc; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle_loc, size_mem_loc, &prop_loc, 0)); @@ -1015,7 +991,7 @@ TEST_CASE("Unit_hipMemSetAccess_Vmm2VMMInterDevMemCpy") { * chunk of memory and map it to device1. Check if these 2 distinct memory * chunks can be mapped to a single address space. * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -1028,32 +1004,28 @@ TEST_CASE("Unit_hipMemSetAccess_MapPhysChksFromMulDev") { size_t buffer_size = N * sizeof(int), granularity = 0; int deviceId = 0; // Allocate resources for all gpus - hipMemGenericAllocationHandle_t *handle = - static_cast( - malloc(sizeof(hipMemGenericAllocationHandle_t)*numOfBuffers)); + hipMemGenericAllocationHandle_t* handle = static_cast( + malloc(sizeof(hipMemGenericAllocationHandle_t) * numOfBuffers)); REQUIRE(handle != nullptr); - size_t *size_mem = static_cast( - malloc(sizeof(size_t)*numOfBuffers)); + size_t* size_mem = static_cast(malloc(sizeof(size_t) * numOfBuffers)); REQUIRE(size_mem != nullptr); size_t total_mem = 0; // Create memory chunks for (deviceId = 0; deviceId < numOfBuffers; deviceId++) { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop_loc{}; prop_loc.type = hipMemAllocationTypePinned; prop_loc.location.type = hipMemLocationTypeDevice; prop_loc.location.id = device; // Current Devices HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop_loc, - hipMemAllocationGranularityMinimum)); + hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_mem[deviceId] = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_mem[deviceId] = ((granularity + buffer_size - 1) / granularity) * granularity; total_mem = total_mem + size_mem[deviceId]; // Allocate physical memory chunks - HIP_CHECK(hipMemCreate(&handle[deviceId], size_mem[deviceId], - &prop_loc, 0)); + HIP_CHECK(hipMemCreate(&handle[deviceId], size_mem[deviceId], &prop_loc, 0)); } // Allocate virtual address range for all the memory chunks hipDeviceptr_t ptrA; @@ -1063,9 +1035,9 @@ TEST_CASE("Unit_hipMemSetAccess_MapPhysChksFromMulDev") { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); uint64_t uiptr = reinterpret_cast(ptrA); - uiptr = uiptr + deviceId*size_mem[deviceId]; - HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), - size_mem[deviceId], 0, handle[deviceId], 0)); + uiptr = uiptr + deviceId * size_mem[deviceId]; + HIP_CHECK( + hipMemMap(reinterpret_cast(uiptr), size_mem[deviceId], 0, handle[deviceId], 0)); HIP_CHECK(hipMemRelease(handle[deviceId])); // Set access hipMemAccessDesc accessDesc_loc = {}; @@ -1073,46 +1045,28 @@ TEST_CASE("Unit_hipMemSetAccess_MapPhysChksFromMulDev") { accessDesc_loc.location.id = device; accessDesc_loc.flags = hipMemAccessFlagsProtReadWrite; // Make the address accessible to deviceId - HIP_CHECK(hipMemSetAccess(reinterpret_cast(uiptr), - size_mem[deviceId], &accessDesc_loc, 1)); + HIP_CHECK( + hipMemSetAccess(reinterpret_cast(uiptr), size_mem[deviceId], &accessDesc_loc, 1)); } - std::vector A_h(numOfBuffers*N), - B_h(numOfBuffers*N); + std::vector A_h(numOfBuffers * N), B_h(numOfBuffers * N); // Fill Data - for (int idx = 0; idx < (numOfBuffers*N); idx++) { - A_h[idx] = idx*idx; + for (int idx = 0; idx < (numOfBuffers * N); idx++) { + A_h[idx] = idx * idx; } - HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), numOfBuffers*buffer_size)); - HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, numOfBuffers*buffer_size)); + HIP_CHECK(hipMemcpyHtoD(ptrA, A_h.data(), numOfBuffers * buffer_size)); + HIP_CHECK(hipMemcpyDtoH(B_h.data(), ptrA, numOfBuffers * buffer_size)); // Validate Results REQUIRE(true == std::equal(B_h.begin(), B_h.end(), A_h.data())); for (deviceId = 0; deviceId < numOfBuffers; deviceId++) { uint64_t uiptr = reinterpret_cast(ptrA); - uiptr = uiptr + deviceId*size_mem[deviceId]; - HIP_CHECK(hipMemUnmap(reinterpret_cast(uiptr), - size_mem[deviceId])); + uiptr = uiptr + deviceId * size_mem[deviceId]; + HIP_CHECK(hipMemUnmap(reinterpret_cast(uiptr), size_mem[deviceId])); } HIP_CHECK(hipMemAddressFree(ptrA, total_mem)); free(handle); free(size_mem); } -/** - * Test Description - * ------------------------ - * - Testing memory resize: Allocate physical memory and map it to virtual - * address range (PtrA). After setting device permission, copy data from - * host to device. Allocate another chunk of memory of a different size. - * Map the new chunk to offset (PtrA + size of old chunk). - * After setting device permission, copy data from host to device at - * offset (PtrA + size of old chunk). Validate both the old data and new - * data after copying back to host. - * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.1 - */ class vmm_resize_class { size_t current_size_tot; size_t current_size_rounded_tot; @@ -1120,18 +1074,16 @@ class vmm_resize_class { std::vector vhandle; std::vector vsize; // allocate initial VMM memory chunk - int allocate_vmm(hipDeviceptr_t *ptr, hipDevice_t device, - size_t size) { + int allocate_vmm(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) { size_t granularity = 0; hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_rounded = - ((granularity + size - 1) / granularity) * granularity; + size_t size_rounded = ((granularity + size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_rounded, &prop, 0)); @@ -1155,20 +1107,20 @@ class vmm_resize_class { } public: - vmm_resize_class(hipDeviceptr_t *ptr, hipDevice_t device, size_t size): - current_size_tot(0), current_size_rounded_tot(0) { + vmm_resize_class(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) + : current_size_tot(0), current_size_rounded_tot(0) { allocate_vmm(ptr, device, size); } // Free all VMM void free_vmm() { - for (hipMemGenericAllocationHandle_t &myhandle : vhandle) { + for (hipMemGenericAllocationHandle_t& myhandle : vhandle) { HIP_CHECK(hipMemRelease(myhandle)); } HIP_CHECK(hipMemUnmap(ptrVmm, current_size_rounded_tot)); HIP_CHECK(hipMemAddressFree(ptrVmm, current_size_rounded_tot)); } // grow memory chunk - int grow_vmm(hipDeviceptr_t *ptr, hipDevice_t device, size_t size) { + int grow_vmm(hipDeviceptr_t* ptr, hipDevice_t device, size_t size) { size_t granularity = 0; if (size <= current_size_tot) { return -1; @@ -1177,13 +1129,12 @@ class vmm_resize_class { prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); // diff size size_t grow_size = (size - current_size_tot); - size_t size_rounded = - ((granularity + grow_size - 1) / granularity) * granularity; + size_t size_rounded = ((granularity + grow_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_rounded, &prop, 0)); @@ -1194,17 +1145,15 @@ class vmm_resize_class { // Unmap and Free the old vmm HIP_CHECK(hipMemUnmap(ptrVmm, current_size_rounded_tot)); HIP_CHECK(hipMemAddressFree(ptrVmm, current_size_rounded_tot)); - HIP_CHECK(hipMemAddressReserve(&ptrVmm, - (size_rounded + current_size_rounded_tot), 0, 0, 0)); + HIP_CHECK(hipMemAddressReserve(&ptrVmm, (size_rounded + current_size_rounded_tot), 0, 0, 0)); int idx = 0; - for (hipMemGenericAllocationHandle_t &myhandle : vhandle) { + for (hipMemGenericAllocationHandle_t& myhandle : vhandle) { if (idx == 0) { HIP_CHECK(hipMemMap(ptrVmm, vsize[idx], 0, myhandle, 0)); } else { uint64_t uiptr = reinterpret_cast(ptrVmm); - uiptr = uiptr + vsize[idx-1]; - HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), - vsize[idx], 0, myhandle, 0)); + uiptr = uiptr + vsize[idx - 1]; + HIP_CHECK(hipMemMap(reinterpret_cast(uiptr), vsize[idx], 0, myhandle, 0)); } idx++; } @@ -1214,9 +1163,7 @@ class vmm_resize_class { accessDesc.location.id = device; accessDesc.flags = hipMemAccessFlagsProtReadWrite; // Make the address accessible to GPU 0 - HIP_CHECK(hipMemSetAccess(ptrVmm, - (size_rounded + current_size_rounded_tot), - &accessDesc, 1)); + HIP_CHECK(hipMemSetAccess(ptrVmm, (size_rounded + current_size_rounded_tot), &accessDesc, 1)); *ptr = ptrVmm; current_size_tot += size; current_size_rounded_tot += size_rounded; @@ -1224,6 +1171,22 @@ class vmm_resize_class { } }; +/** + * Test Description + * ------------------------ + * - Testing memory resize: Allocate physical memory and map it to virtual + * address range (PtrA). After setting device permission, copy data from + * host to device. Allocate another chunk of memory of a different size. + * Map the new chunk to offset (PtrA + size of old chunk). + * After setting device permission, copy data from host to device at + * offset (PtrA + size of old chunk). Validate both the old data and new + * data after copying back to host. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ TEST_CASE("Unit_hipMemSetAccess_GrowVMM") { hipDeviceptr_t ptr; constexpr int N = DATA_SIZE; @@ -1231,11 +1194,11 @@ TEST_CASE("Unit_hipMemSetAccess_GrowVMM") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); // Create VMM Object of size buffer_size vmm_resize_class resizeobj(&ptr, device, buffer_size); // Inititalize Host Buffer - int *ptrA_h = static_cast(malloc(buffer_size)); + int* ptrA_h = static_cast(malloc(buffer_size)); REQUIRE(ptrA_h != nullptr); for (int idx = 0; idx < N; idx++) { ptrA_h[idx] = idx; @@ -1250,17 +1213,16 @@ TEST_CASE("Unit_hipMemSetAccess_GrowVMM") { return; } free(ptrA_h); - ptrA_h = static_cast(malloc(buffer_size_new - buffer_size)); + ptrA_h = static_cast(malloc(buffer_size_new - buffer_size)); REQUIRE(ptrA_h != nullptr); for (int idx = 0; idx < (Nnew - N); idx++) { ptrA_h[idx] = N + idx; } - int *ptrB_h = static_cast(malloc(buffer_size_new)); + int* ptrB_h = static_cast(malloc(buffer_size_new)); REQUIRE(ptrB_h != nullptr); uint64_t uiptr = reinterpret_cast(ptr); uiptr = uiptr + buffer_size; - HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(uiptr), - ptrA_h, (buffer_size_new - buffer_size))); + HIP_CHECK(hipMemcpyHtoD(reinterpret_cast(uiptr), ptrA_h, (buffer_size_new - buffer_size))); HIP_CHECK(hipMemcpyDtoH(ptrB_h, ptr, buffer_size_new)); bool bPassed = true; for (int idx = 0; idx < Nnew; idx++) { @@ -1275,18 +1237,6 @@ TEST_CASE("Unit_hipMemSetAccess_GrowVMM") { resizeobj.free_vmm(); } -/** - * Test Description - * ------------------------ - * - Multithreaded test: Allocate unique virtual memory chunks from - * multiple threads. Transfer data to these chunks from host and execute - * kernel function on these data. Validate the results. - * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.1 - */ std::atomic bTestPassed{1}; #define NUM_THREADS 5 void test_thread(hipDevice_t device) { @@ -1296,14 +1246,14 @@ void test_thread(hipDevice_t device) { // Create VMM Object of size buffer_size vmm_resize_class vmmobj(&ptr, device, buffer_size); // Inititalize Host Buffer - int *ptrA_h = static_cast(malloc(buffer_size)); + int* ptrA_h = static_cast(malloc(buffer_size)); REQUIRE(ptrA_h != nullptr); for (int idx = 0; idx < N; idx++) { ptrA_h[idx] = idx; } // Copy to VMM HIP_CHECK(hipMemcpyHtoD(ptr, ptrA_h, buffer_size)); - int *ptrB_h = static_cast(malloc(buffer_size)); + int* ptrB_h = static_cast(malloc(buffer_size)); REQUIRE(ptrB_h != nullptr); HIP_CHECK(hipMemcpyDtoH(ptrB_h, ptr, buffer_size)); bool bPassed = true; @@ -1323,11 +1273,23 @@ void test_thread(hipDevice_t device) { vmmobj.free_vmm(); } +/** + * Test Description + * ------------------------ + * - Multithreaded test: Allocate unique virtual memory chunks from + * multiple threads. Transfer data to these chunks from host and execute + * kernel function on these data. Validate the results. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ TEST_CASE("Unit_hipMemSetAccess_Multithreaded") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); std::thread T[NUM_THREADS]; for (int i = 0; i < NUM_THREADS; i++) { T[i] = std::thread(test_thread, device); @@ -1340,18 +1302,7 @@ TEST_CASE("Unit_hipMemSetAccess_Multithreaded") { } #ifdef __linux__ -/** - * Test Description - * ------------------------ - * - Multiprocess test: Allocate unique virtual memory chunks from - * multiple processes. Transfer data to these chunks from host and - * execute kernel function on these data. Validate the results. - * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc - * Test requirements - * ------------------------ - * - HIP_VERSION >= 6.1 - */ + bool test_mprocess() { int fd[2]; bool testResult = false; @@ -1369,7 +1320,7 @@ bool test_mprocess() { hipDeviceptr_t ptr; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupportedRetVal(device) + checkVMMSupportedRetVal(device); // Create VMM Object of size buffer_size vmm_resize_class vmmobj(&ptr, device, buffer_size); // Inititalize Host Buffer @@ -1399,7 +1350,7 @@ bool test_mprocess() { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupportedRetVal(device) + checkVMMSupportedRetVal(device); // Create VMM Object of size buffer_size vmm_resize_class vmmobj(&ptr, device, buffer_size); // Inititalize Host Buffer @@ -1424,9 +1375,20 @@ bool test_mprocess() { return testResult; } -TEST_CASE("Unit_hipMemSetAccess_MultiProc") { - REQUIRE(true == test_mprocess()); -} +/** + * Test Description + * ------------------------ + * - Multiprocess test: Allocate unique virtual memory chunks from + * multiple processes. Transfer data to these chunks from host and + * execute kernel function on these data. Validate the results. + * ------------------------ + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.1 + */ +TEST_CASE("Unit_hipMemSetAccess_MultiProc") { REQUIRE(true == test_mprocess()); } + #endif /** @@ -1434,7 +1396,7 @@ TEST_CASE("Unit_hipMemSetAccess_MultiProc") { * ------------------------ * - Negative Tests for hipMemSetAccess() * ------------------------ - * - catch\unit\memory\hipMemSetGetAccess.cc + * - unit/virtualMemoryManagement/hipMemSetGetAccess.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -1446,16 +1408,15 @@ TEST_CASE("Unit_hipMemSetAccess_negative") { int deviceId = 0; hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; // Allocate physical memory HIP_CHECK(hipMemCreate(&handle, size_mem, &prop, 0)); @@ -1470,73 +1431,62 @@ TEST_CASE("Unit_hipMemSetAccess_negative") { accessDesc.flags = hipMemAccessFlagsProtReadWrite; SECTION("nullptr to ptrA") { - REQUIRE(hipMemSetAccess(nullptr, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(nullptr, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("pass zero to size") { - REQUIRE(hipMemSetAccess(&ptrA, 0, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, 0, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("pass a size greater than reserved size") { - REQUIRE(hipMemSetAccess(&ptrA, size_mem + 1, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem + 1, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("pass a size less than reserved size") { - REQUIRE(hipMemSetAccess(&ptrA, size_mem - 1, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem - 1, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("invalid location type") { accessDesc.location.type = hipMemLocationTypeInvalid; - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("invalid id") { accessDesc.location.id = -1; - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("pass location id as > highest device number") { int numDevices = 0; HIP_CHECK(hipGetDeviceCount(&numDevices)); accessDesc.location.id = numDevices; // set to non existing device - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } SECTION("invalid flag") { accessDesc.flags = static_cast(-1); - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } SECTION(" pass zero to count") { - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 0) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 0) == hipErrorInvalidValue); } SECTION("pass desc as nullptr") { - REQUIRE(hipMemSetAccess(&ptrA, size_mem, nullptr, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, nullptr, 1) == hipErrorInvalidValue); } SECTION("uninitialized virtual memory") { hipDeviceptr_t ptrB; HIP_CHECK(hipMemAddressReserve(&ptrB, size_mem, 0, 0, 0)); - REQUIRE(hipMemSetAccess(&ptrB, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrB, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } HIP_CHECK(hipMemUnmap(ptrA, size_mem)); SECTION("unmapped virtual memory") { - REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == - hipErrorInvalidValue); + REQUIRE(hipMemSetAccess(&ptrA, size_mem, &accessDesc, 1) == hipErrorInvalidValue); } + HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); HIP_CHECK(hipMemRelease(handle)); } diff --git a/catch/unit/memory/hipMemUnmap.cc b/catch/unit/virtualMemoryManagement/hipMemUnmap.cc similarity index 83% rename from catch/unit/memory/hipMemUnmap.cc rename to catch/unit/virtualMemoryManagement/hipMemUnmap.cc index 1244b794c6..eeadb83099 100644 --- a/catch/unit/memory/hipMemUnmap.cc +++ b/catch/unit/virtualMemoryManagement/hipMemUnmap.cc @@ -23,22 +23,24 @@ THE SOFTWARE. /** * @addtogroup hipMemUnmap hipMemUnmap * @{ - * @ingroup MemoryTest + * @ingroup VirtualMemoryManagementTest * `hipError_t hipMemUnmap (void* ptr, size_t size)` - * Unmap memory allocation of a given address range. */ #include + #include "hip_vmm_common.hh" constexpr int N = (1 << 13); + /** * Test Description * ------------------------ * - Negative Tests * ------------------------ - * - catch\unit\memory\hipMemUnmap.cc + * - unit/virtualMemoryManagement/hipMemUnmap.cc * Test requirements * ------------------------ * - HIP_VERSION >= 6.1 @@ -50,18 +52,17 @@ TEST_CASE("Unit_hipMemUnmap_negative") { hipDevice_t device; HIP_CHECK(hipDeviceGet(&device, deviceId)); - checkVMMSupported(device) + checkVMMSupported(device); hipMemAllocationProp prop{}; prop.type = hipMemAllocationTypePinned; prop.location.type = hipMemLocationTypeDevice; prop.location.id = device; // Current Devices - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &prop, - hipMemAllocationGranularityMinimum)); + HIP_CHECK( + hipMemGetAllocationGranularity(&granularity, &prop, hipMemAllocationGranularityMinimum)); REQUIRE(granularity > 0); - size_t size_mem = - ((granularity + buffer_size - 1) / granularity) * granularity; + size_t size_mem = ((granularity + buffer_size - 1) / granularity) * granularity; hipMemGenericAllocationHandle_t handle; hipDeviceptr_t ptrA; @@ -70,18 +71,17 @@ TEST_CASE("Unit_hipMemUnmap_negative") { // Allocate virtual address range HIP_CHECK(hipMemAddressReserve(&ptrA, size_mem, 0, 0, 0)); HIP_CHECK(hipMemMap(ptrA, size_mem, 0, handle, 0)); - SECTION("nullptr to ptrA") { - REQUIRE(hipMemUnmap(nullptr, size_mem) == hipErrorInvalidValue); - } - SECTION("pass zero to size") { - REQUIRE(hipMemUnmap(ptrA, 0) == hipErrorInvalidValue); - } + SECTION("nullptr to ptrA") { REQUIRE(hipMemUnmap(nullptr, size_mem) == hipErrorInvalidValue); } + + SECTION("pass zero to size") { REQUIRE(hipMemUnmap(ptrA, 0) == hipErrorInvalidValue); } + #if HT_NVIDIA SECTION("unmap a smaller size") { REQUIRE(hipMemUnmap(ptrA, (size_mem - 1)) == hipErrorInvalidValue); } #endif + HIP_CHECK(hipMemRelease(handle)); HIP_CHECK(hipMemUnmap(ptrA, size_mem)); HIP_CHECK(hipMemAddressFree(ptrA, size_mem)); diff --git a/catch/unit/memory/hipMemVmm.cc b/catch/unit/virtualMemoryManagement/hipMemVmm_old.cc similarity index 58% rename from catch/unit/memory/hipMemVmm.cc rename to catch/unit/virtualMemoryManagement/hipMemVmm_old.cc index d9f51f29ac..c2258f057d 100644 --- a/catch/unit/memory/hipMemVmm.cc +++ b/catch/unit/virtualMemoryManagement/hipMemVmm_old.cc @@ -1,30 +1,30 @@ /* - Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR - IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - */ +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ /* Test Case Description: 1) This testcase verifies the basic scenario - supported on all devices */ -#include -#include -#include #include #include #include @@ -32,6 +32,10 @@ #include #include +#include +#include +#include + /* This testcase verifies HIP Mem VMM API basic scenario - supported on all devices */ @@ -42,8 +46,9 @@ TEST_CASE("Unit_hipMemVmm_Basic") { INFO("hipDeviceAttributeVirtualMemoryManagementSupported: " << vmm); if (vmm == 0) { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeVirtualMemoryManagement " - "attribute. Hence skipping the testing with Pass result.\n"); + SUCCEED( + "GPU 0 doesn't support hipDeviceAttributeVirtualMemoryManagement " + "attribute. Hence skipping the testing with Pass result.\n"); return; } @@ -54,7 +59,8 @@ TEST_CASE("Unit_hipMemVmm_Basic") { memAllocationProp.location.id = 0; memAllocationProp.location.type = hipMemLocationTypeDevice; - HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &memAllocationProp, hipMemAllocationGranularityRecommended)); + HIP_CHECK(hipMemGetAllocationGranularity(&granularity, &memAllocationProp, + hipMemAllocationGranularityRecommended)); size_t size = 4 * 1024; void* reservedAddress{nullptr}; @@ -78,7 +84,7 @@ TEST_CASE("Unit_hipMemVmm_Basic") { HIP_CHECK(hipMemset(reservedAddress, value, size)); HIP_CHECK(hipMemcpy(&values[0], reservedAddress, size, hipMemcpyDeviceToHost)); - for (size_t i=0; i < size; ++i) { + for (size_t i = 0; i < size; ++i) { REQUIRE(values[i] == value); } @@ -87,4 +93,3 @@ TEST_CASE("Unit_hipMemVmm_Basic") { HIP_CHECK(hipMemRelease(gaHandle)); HIP_CHECK(hipMemAddressFree(reservedAddress, size)); } - diff --git a/catch/unit/virtualMemoryManagement/hip_vmm_common.hh b/catch/unit/virtualMemoryManagement/hip_vmm_common.hh new file mode 100644 index 0000000000..a43af62758 --- /dev/null +++ b/catch/unit/virtualMemoryManagement/hip_vmm_common.hh @@ -0,0 +1,49 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include "hip_test_context.hh" + +#define checkVMMSupported(device) \ + { \ + int value = 0; \ + hipDeviceAttribute_t attr = hipDeviceAttributeVirtualMemoryManagementSupported; \ + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); \ + if (value == 0) { \ + HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test.."); \ + return; \ + } \ + } + +#define checkVMMSupportedRetVal(device) \ + { \ + int value = 0; \ + hipDeviceAttribute_t attr = hipDeviceAttributeVirtualMemoryManagementSupported; \ + HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); \ + if (value == 0) { \ + HipTest::HIP_SKIP_TEST("Machine does not support VMM. Skipping Test.."); \ + return true; \ + } \ + } + +constexpr int threadsPerBlk = 64; \ No newline at end of file From dec1be580ee71a9b95b84bb1baab9550194715aa Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 27 Dec 2023 18:09:39 +0100 Subject: [PATCH 54/71] EXSWHTEC-372 - Implement tests for the hipGraph*ExternalSemaphoresSignalNode APIs #450 Change-Id: I6122be35eb4cbea8ecde5642ac436b2f3b4c3a24 --- catch/unit/vulkan_interop/CMakeLists.txt | 4 + .../unit/vulkan_interop/graph_tests_common.hh | 76 ++++++ ...hipGraphAddExternalSemaphoresSignalNode.cc | 135 ++++++++++ ...ecExternalSemaphoresSignalNodeSetParams.cc | 193 ++++++++++++++ ...phExternalSemaphoresSignalNodeGetParams.cc | 96 +++++++ ...phExternalSemaphoresSignalNodeSetParams.cc | 137 ++++++++++ .../hipSignalExternalSemaphoresAsync.cc | 140 +---------- .../vulkan_interop/signal_semaphore_common.hh | 236 ++++++++++++++++++ 8 files changed, 881 insertions(+), 136 deletions(-) create mode 100644 catch/unit/vulkan_interop/graph_tests_common.hh create mode 100644 catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc create mode 100644 catch/unit/vulkan_interop/signal_semaphore_common.hh diff --git a/catch/unit/vulkan_interop/CMakeLists.txt b/catch/unit/vulkan_interop/CMakeLists.txt index a0c39ebb0b..35212887f5 100644 --- a/catch/unit/vulkan_interop/CMakeLists.txt +++ b/catch/unit/vulkan_interop/CMakeLists.txt @@ -8,6 +8,10 @@ set(TEST_SRC hipSignalExternalSemaphoresAsync.cc hipImportExternalSemaphore.cc hipDestroyExternalSemaphore.cc + hipGraphAddExternalSemaphoresSignalNode.cc + hipGraphExternalSemaphoresSignalNodeSetParams.cc + hipGraphExternalSemaphoresSignalNodeGetParams.cc + hipGraphExecExternalSemaphoresSignalNodeSetParams.cc ) if(WIN32) diff --git a/catch/unit/vulkan_interop/graph_tests_common.hh b/catch/unit/vulkan_interop/graph_tests_common.hh new file mode 100644 index 0000000000..bb28ec5ea5 --- /dev/null +++ b/catch/unit/vulkan_interop/graph_tests_common.hh @@ -0,0 +1,76 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include + +#include +#include + +template void GraphAddNodeCommonNegativeTests(F f, hipGraph_t graph) { + hipGraphNode_t node = nullptr; + SECTION("graph == nullptr") { + HIP_CHECK_ERROR(f(&node, nullptr, nullptr, 0), hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(f(nullptr, graph, nullptr, 0), hipErrorInvalidValue); + } + + SECTION("dependencies == nullptr with size != 0") { + HIP_CHECK_ERROR(f(&node, graph, nullptr, 1), hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-202 +#if HT_NVIDIA + SECTION("Node in dependency is from different graph") { + hipGraph_t other_graph = nullptr; + HIP_CHECK(hipGraphCreate(&other_graph, 0)); + hipGraphNode_t other_node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&other_node, other_graph, nullptr, 0)); + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&node, graph, nullptr, 0)); + HIP_CHECK_ERROR(f(&node, graph, &other_node, 1), hipErrorInvalidValue); + HIP_CHECK(hipGraphDestroy(other_graph)); + } +#endif + + SECTION("Invalid numNodes") { + hipGraphNode_t dep_node = nullptr; + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + HIP_CHECK_ERROR(f(&node, graph, &dep_node, 2), hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-201 +#if HT_NVIDIA + SECTION("Duplicate node in dependencies") { + hipGraphNode_t dep_node = nullptr; + // Need to create two nodes to avoid overlap with Invalid numNodes case + // First one is left dangling as the graph will be destroyed after the section anyway + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + HIP_CHECK(hipGraphAddEmptyNode(&dep_node, graph, nullptr, 0)); + hipGraphNode_t deps[] = {dep_node, dep_node}; + HIP_CHECK_ERROR(f(&node, graph, deps, 2), hipErrorInvalidValue); + } +#endif +} \ No newline at end of file diff --git a/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc new file mode 100644 index 0000000000..932f99cb41 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc @@ -0,0 +1,135 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphAddExternalSemaphoresSignalNode hipGraphAddExternalSemaphoresSignalNode + * @{ + * @ingroup GraphTest + * `hipGraphAddExternalSemaphoresSignalNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const + * hipGraphNode_t* pDependencies, size_t numDependencies, const + * hipExternalSemaphoreSignalNodeParams* nodeParams);` - Creates a external semaphor signal node and + * adds it to a graph. + */ + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates an external Vulkan binary semaphore. + * - Creates a Vulkan fence and signals semaphore asynchronously. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExtSemaphoreSignalWrapper<>); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Creates an external Vulkan timeline semaphore. + * - Imports the semaphore and signals. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExtSemaphoreSignalWrapper<>); +} + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates multiple external Vulkan binary semaphores. + * - Createas a Vulkan fence and signals semaphores. + * - Waits for the operations to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExtSemaphoreSignalWrapper<>); +} +#endif + + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddExternalSemaphoresSignalNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphAddExternalSemaphoresSignalNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresSignalNode_Vulkan_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + GraphAddNodeCommonNegativeTests( + std::bind(hipGraphAddExternalSemaphoresSignalNode, _1, _2, _3, _4, &node_params), graph); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc new file mode 100644 index 0000000000..939d95beb4 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc @@ -0,0 +1,193 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExecExternalSemaphoresSignalNodeSetParams + * hipGraphExecExternalSemaphoresSignalNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExecExternalSemaphoresSignalNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t + * hNode, const hipExternalSemaphoreSignalNodeParams* nodeParams)` - Updates node parameters in the + * external semaphore signal node in the given graphExec. + */ + +static hipError_t GraphExecSemaphoreSetParamsSignalWrapper( + hipExternalSemaphore_t* extSemArray, hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + hipExternalSemaphoreSignalParams* signal_params = + new hipExternalSemaphoreSignalParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + signal_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreSignalNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = signal_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &initial_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, node, &node_params)); + + hipExternalSemaphoreSignalNodeParams retrieved_params = {0}; + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + delete[] signal_params; + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external binary semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external timeline semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node signals the external binary semaphores and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExecSemaphoreSetParamsSignalWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExecExternalSemaphoresSignalNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graphexec + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresSignalNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresSignalNodeSetParams(nullptr, node, &node_params), + hipErrorInvalidValue); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR( + hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, nullptr, &node_params), + hipErrorInvalidValue); + } + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresSignalNodeSetParams(graph_exec, node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc new file mode 100644 index 0000000000..6f6c3c2787 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresSignalNodeGetParams + * hipGraphExternalSemaphoresSignalNodeGetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresSignalNodeGetParams(hipGraphNode_t hNode, + * hipExternalSemaphoreSignalNodeParams* params_out)` - Returns external semaphore signal node + * params. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Positive_Basic + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore + * - @ref Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores + */ + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresSignalNodeGetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * -# Node is destroyed + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeGetParams_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + hipExternalSemaphoreSignalNodeParams retrieved_params; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(nullptr, &retrieved_params), + hipErrorInvalidValue); + } + + SECTION("params_out == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(node, nullptr), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-208 +#if HT_NVIDIA + SECTION("Node is destroyed") { + hipGraph_t graph_temp = nullptr; + HIP_CHECK(hipGraphCreate(&graph_temp, 0)); + hipGraphNode_t node_temp = nullptr; + HIP_CHECK( + hipGraphAddExternalSemaphoresSignalNode(&node_temp, graph_temp, nullptr, 0, &node_params)); + HIP_CHECK(hipGraphDestroy(graph_temp)); + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeGetParams(node_temp, &retrieved_params), + hipErrorInvalidValue); + } +#endif +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc new file mode 100644 index 0000000000..8f964d966d --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc @@ -0,0 +1,137 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "signal_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresSignalNodeSetParams + * hipGraphExternalSemaphoresSignalNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresSignalNodeSetParams(hipGraphNode_t hNode, const + * hipExternalSemaphoreSignalNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore signal node. + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external binary semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Positive_Basic") { + SignalExternalSemaphoreCommon(GraphExtSemaphoreSignalWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external timeline semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + SignalExternalTimelineSemaphoreCommon(GraphExtSemaphoreSignalWrapper); +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node signals the external binary semaphores and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + SignalExternalMultipleSemaphoresCommon(GraphExtSemaphoreSignalWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresSignalNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresSignalNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresSignalNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &signal_params; + node_params.numExtSems = 1; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeSetParams(nullptr, &node_params), + hipErrorInvalidValue); + } + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresSignalNodeSetParams(node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc b/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc index 4485a3ad5f..64ae1e3637 100644 --- a/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc +++ b/catch/unit/vulkan_interop/hipSignalExternalSemaphoresAsync.cc @@ -20,152 +20,20 @@ THE SOFTWARE. */ #include "vulkan_test.hh" - -constexpr bool enable_validation = false; +#include "signal_semaphore_common.hh" TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Binary_Semaphore") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - hipExternalSemaphoreSignalParams signal_params = {}; - signal_params.params.fence.value = 0; - HIP_CHECK(hipSignalExternalSemaphoresAsync(&hip_ext_semaphore, &signal_params, 1, nullptr)); - HIP_CHECK(hipDeviceSynchronize()); - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - VkSemaphore waitSemaphores[] = {semaphore}; - // VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT or VK_PIPELINE_STAGE_TRANSFER_BIT can work - VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; - submit_info.waitSemaphoreCount = 1; - submit_info.pWaitSemaphores = waitSemaphores; - submit_info.pWaitDstStageMask = waitStages; - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); - PollStream(nullptr, hipSuccess); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + SignalExternalSemaphoreCommon(hipSignalExternalSemaphoresAsync); } // Timeline semaphores unsupported on AMD #if HT_NVIDIA TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Timeline_Semaphore") { - VulkanTest vkt(enable_validation); - constexpr uint64_t signal_value = 2; - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreSignalParams signal_params = {}; - signal_params.params.fence.value = signal_value; - - HIP_CHECK(hipSignalExternalSemaphoresAsync(&hip_ext_semaphore, &signal_params, 1, nullptr)); - PollStream(nullptr, hipSuccess); - - uint64_t sem_value = 0u; - VK_CHECK_RESULT(vkGetSemaphoreCounterValue(vkt.GetDevice(), semaphore, &sem_value)); - - REQUIRE(2 == sem_value); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + SignalExternalTimelineSemaphoreCommon(hipSignalExternalSemaphoresAsync); } TEST_CASE("Unit_hipSignalExternalSemaphoresAsync_Vulkan_Positive_Multiple_Semaphores") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, - VK_BUFFER_USAGE_TRANSFER_DST_BIT); - -#if HT_AMD - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; -#else - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; -#endif - - const auto command_buffer = vkt.GetCommandBuffer(); - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_binary_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_binary_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); - - const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); - const auto hip_timeline_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); - hipExternalSemaphore_t hip_timeline_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, - &hip_timeline_sem_handle_desc)); - - uint64_t wait_values[] = {1, 0}; - VkTimelineSemaphoreSubmitInfo timeline_submit_info = {}; - timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; - timeline_submit_info.waitSemaphoreValueCount = 2; - timeline_submit_info.pWaitSemaphoreValues = wait_values; - - VkSemaphore wait_semaphores[] = {timeline_semaphore, binary_semaphore}; - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.waitSemaphoreCount = 2; - submit_info.pWaitSemaphores = wait_semaphores; - submit_info.pNext = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? &timeline_submit_info : nullptr; - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - - REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); - - hipExternalSemaphoreSignalParams binary_signal_params = {}; - binary_signal_params.params.fence.value = 0; - hipExternalSemaphoreSignalParams timeline_signal_params = {}; - timeline_signal_params.params.fence.value = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 2 : 0; - hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; - hipExternalSemaphoreSignalParams signal_params[] = {binary_signal_params, - timeline_signal_params}; - HIP_CHECK(hipSignalExternalSemaphoresAsync(ext_semaphores, signal_params, 2, nullptr)); - - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); + SignalExternalMultipleSemaphoresCommon(hipSignalExternalSemaphoresAsync); } #endif diff --git a/catch/unit/vulkan_interop/signal_semaphore_common.hh b/catch/unit/vulkan_interop/signal_semaphore_common.hh new file mode 100644 index 0000000000..2c8b2f1c86 --- /dev/null +++ b/catch/unit/vulkan_interop/signal_semaphore_common.hh @@ -0,0 +1,236 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +constexpr bool enable_validation = false; + +template void SignalExternalSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = 0; + HIP_CHECK(f(&hip_ext_semaphore, &signal_params, 1, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + VkSemaphore waitSemaphores[] = {semaphore}; + // VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT or VK_PIPELINE_STAGE_TRANSFER_BIT can work + VkPipelineStageFlags waitStages[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT}; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = waitSemaphores; + submit_info.pWaitDstStageMask = waitStages; + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); + PollStream(nullptr, hipSuccess); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +#if HT_NVIDIA +template void SignalExternalTimelineSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + constexpr uint64_t signal_value = 2; + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreSignalParams signal_params = {}; + signal_params.params.fence.value = signal_value; + + HIP_CHECK(f(&hip_ext_semaphore, &signal_params, 1, nullptr)); + PollStream(nullptr, hipSuccess); + + uint64_t sem_value = 0u; + VK_CHECK_RESULT(vkGetSemaphoreCounterValue(vkt.GetDevice(), semaphore, &sem_value)); + + REQUIRE(2 == sem_value); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +template void SignalExternalMultipleSemaphoresCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + +#if HT_AMD + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; +#else + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; +#endif + + const auto command_buffer = vkt.GetCommandBuffer(); + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_binary_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_binary_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); + + const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); + const auto hip_timeline_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); + hipExternalSemaphore_t hip_timeline_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); + + uint64_t wait_values[] = {1, 0}; + VkTimelineSemaphoreSubmitInfo timeline_submit_info = {}; + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO; + timeline_submit_info.waitSemaphoreValueCount = 2; + timeline_submit_info.pWaitSemaphoreValues = wait_values; + + VkSemaphore wait_semaphores[] = {timeline_semaphore, binary_semaphore}; + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.waitSemaphoreCount = 2; + submit_info.pWaitSemaphores = wait_semaphores; + submit_info.pNext = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? &timeline_submit_info : nullptr; + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + + REQUIRE(vkGetFenceStatus(vkt.GetDevice(), fence) == VK_NOT_READY); + + hipExternalSemaphoreSignalParams binary_signal_params = {}; + binary_signal_params.params.fence.value = 0; + hipExternalSemaphoreSignalParams timeline_signal_params = {}; + timeline_signal_params.params.fence.value = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 2 : 0; + hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; + hipExternalSemaphoreSignalParams signal_params[] = {binary_signal_params, timeline_signal_params}; + HIP_CHECK(f(ext_semaphores, signal_params, 2, nullptr)); + + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); +} +#endif + +static inline bool operator==(const hipExternalSemaphoreSignalNodeParams& lhs, + const hipExternalSemaphoreSignalNodeParams& rhs) { + bool equal = true; + if (lhs.numExtSems != rhs.numExtSems) { + return false; + } + for (unsigned int i = 0; i < lhs.numExtSems; i++) { + if ((lhs.extSemArray[i] != rhs.extSemArray[i]) || + (lhs.paramsArray[i].params.fence.value != rhs.paramsArray[i].params.fence.value)) { + equal = false; + break; + } + } + return equal; +} + +template +hipError_t GraphExtSemaphoreSignalWrapper(hipExternalSemaphore_t* extSemArray, + hipExternalSemaphoreSignalParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + hipExternalSemaphoreSignalNodeParams retrieved_params = {}; + memset(&retrieved_params, 0, sizeof(retrieved_params)); + + hipExternalSemaphoreSignalNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + if constexpr (set_params) { + hipExternalSemaphoreSignalParams* signal_params = + new hipExternalSemaphoreSignalParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + signal_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreSignalNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = signal_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &initial_params)); + + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeSetParams(node, &node_params)); + + delete[] signal_params; + } else { + HIP_CHECK(hipGraphAddExternalSemaphoresSignalNode(&node, graph, nullptr, 0, &node_params)); + } + + HIP_CHECK(hipGraphExternalSemaphoresSignalNodeGetParams(node, &retrieved_params)); + REQUIRE(node_params == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} From 436ae73843cec7bcc92111b401d3dbcb161e1a67 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Wed, 27 Dec 2023 22:07:49 +0100 Subject: [PATCH 55/71] EXSWHTEC-373 - Implement tests for the hipGraph*ExternalSemaphoresWaitNode APIs #451 Change-Id: I3310cfd274d73eec8018e183a9e7464cd05b193c --- catch/unit/vulkan_interop/CMakeLists.txt | 4 + .../hipGraphAddExternalSemaphoresWaitNode.cc | 134 +++++++++ ...ExecExternalSemaphoresWaitNodeSetParams.cc | 189 +++++++++++++ ...raphExternalSemaphoresWaitNodeGetParams.cc | 96 +++++++ ...raphExternalSemaphoresWaitNodeSetParams.cc | 136 +++++++++ .../hipWaitExternalSemaphoresAsync.cc | 165 +---------- .../vulkan_interop/wait_semaphore_common.hh | 263 ++++++++++++++++++ 7 files changed, 826 insertions(+), 161 deletions(-) create mode 100644 catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc create mode 100644 catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc create mode 100644 catch/unit/vulkan_interop/wait_semaphore_common.hh diff --git a/catch/unit/vulkan_interop/CMakeLists.txt b/catch/unit/vulkan_interop/CMakeLists.txt index 35212887f5..3728fd2bf9 100644 --- a/catch/unit/vulkan_interop/CMakeLists.txt +++ b/catch/unit/vulkan_interop/CMakeLists.txt @@ -12,6 +12,10 @@ set(TEST_SRC hipGraphExternalSemaphoresSignalNodeSetParams.cc hipGraphExternalSemaphoresSignalNodeGetParams.cc hipGraphExecExternalSemaphoresSignalNodeSetParams.cc + hipGraphAddExternalSemaphoresWaitNode.cc + hipGraphExternalSemaphoresWaitNodeSetParams.cc + hipGraphExternalSemaphoresWaitNodeGetParams.cc + hipGraphExecExternalSemaphoresWaitNodeSetParams.cc ) if(WIN32) diff --git a/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc new file mode 100644 index 0000000000..a64eec5f6f --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc @@ -0,0 +1,134 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" +#include "graph_tests_common.hh" + +/** + * @addtogroup hipGraphAddExternalSemaphoresWaitNode hipGraphAddExternalSemaphoresWaitNode + * @{ + * @ingroup GraphTest + * `hipGraphAddExternalSemaphoresWaitNode(hipGraphNode_t* pGraphNode, hipGraph_t graph, const + * hipGraphNode_t* pDependencies, size_t numDependencies, const hipExternalSemaphoreWaitNodeParams* + * nodeParams)` - Creates a external semaphor wait node and adds it to a graph. + */ + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates an external Vulkan binary semaphore. + * - Creates a Vulkan fence and signals semaphore asynchronously. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ + +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExtSemaphoreWaitWrapper<>); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Creates an external Vulkan timeline semaphore. + * - Imports the semaphore and signals. + * - Waits for the operation to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExtSemaphoreWaitWrapper<>); +} +#endif + +/** + * Test Description + * ------------------------ + * - Creates two host visible Vulkan buffers. + * - Adds a buffer copy command which will copy from one buffer to another. + * - Creates multiple external Vulkan binary semaphores. + * - Createas a Vulkan fence and signals semaphores. + * - Waits for the operations to finish successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExtSemaphoreWaitWrapper<>); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddExternalSemaphoresWaitNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphAddExternalSemaphoresWaitNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddExternalSemaphoresWaitNode_Vulkan_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + GraphAddNodeCommonNegativeTests( + std::bind(hipGraphAddExternalSemaphoresWaitNode, _1, _2, _3, _4, &node_params), graph); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc new file mode 100644 index 0000000000..6d03be3d40 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc @@ -0,0 +1,189 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExecExternalSemaphoresWaitNodeSetParams + * hipGraphExecExternalSemaphoresWaitNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExecExternalSemaphoresWaitNodeSetParams(hipGraphExec_t hGraphExec, hipGraphNode_t hNode, + * const hipExternalSemaphoreWaitNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore wait node in the given graphExec. + */ + +static hipError_t GraphExecSemaphoreSetParamsWaitWrapper( + hipExternalSemaphore_t* extSemArray, hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + hipExternalSemaphoreWaitParams wait_params[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + wait_params[i].flags = 0; + wait_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreWaitNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = wait_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &initial_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, node, &node_params)); + + hipExternalSemaphoreWaitNodeParams retrieved_params = {0}; + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external binary semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external timeline semaphore and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and then setting them to the correct values in the executable graph. The + * graph is run and it is verified that the graph node waits for the external binary semaphores and + * operation finishes successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE( + "Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExecSemaphoreSetParamsWaitWrapper); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExecExternalSemaphoresWaitNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graphexec + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExecExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExecExternalSemaphoresWaitNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + SECTION("pGraphExec == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresWaitNodeSetParams(nullptr, node, &node_params)); + } + + SECTION("node == nullptr") { + HIP_CHECK_ERROR( + hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, nullptr, &node_params)); + } + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExecExternalSemaphoresWaitNodeSetParams(graph_exec, node, nullptr)); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc new file mode 100644 index 0000000000..b6c0034c04 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresWaitNodeGetParams + * hipGraphExternalSemaphoresWaitNodeGetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresWaitNodeGetParams(hipGraphNode_t hNode, + * hipExternalSemaphoreWaitNodeParams* params_out)` - Returns external semaphore wait node params. + * ________________________ + * Test cases from other APIs: + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Positive_Basic + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore + * - @ref Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores + */ + + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresWaitNodeGetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * -# Node is destroyed + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeGetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeGetParams_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + hipExternalSemaphoreWaitNodeParams retrieved_params; + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(nullptr, &retrieved_params), + hipErrorInvalidValue); + } + + SECTION("params_out == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(node, nullptr), + hipErrorInvalidValue); + } + +// Disabled on AMD due to defect - EXSWHTEC-208 +#if HT_NVIDIA + SECTION("Node is destroyed") { + hipGraph_t graph_temp = nullptr; + HIP_CHECK(hipGraphCreate(&graph_temp, 0)); + hipGraphNode_t node_temp = nullptr; + HIP_CHECK( + hipGraphAddExternalSemaphoresWaitNode(&node_temp, graph_temp, nullptr, 0, &node_params)); + HIP_CHECK(hipGraphDestroy(graph_temp)); + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeGetParams(node_temp, &retrieved_params), + hipErrorInvalidValue); + } +#endif +} diff --git a/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc new file mode 100644 index 0000000000..c21c810ac1 --- /dev/null +++ b/catch/unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc @@ -0,0 +1,136 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "vulkan_test.hh" +#include "wait_semaphore_common.hh" + +/** + * @addtogroup hipGraphExternalSemaphoresWaitNodeSetParams + * hipGraphExternalSemaphoresWaitNodeSetParams + * @{ + * @ingroup GraphTest + * `hipGraphExternalSemaphoresWaitNodeSetParams(hipGraphNode_t hNode, const + * hipExternalSemaphoreWaitNodeParams* nodeParams)` - Updates node parameters in the external + * semaphore wait node. + */ + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external binary semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Positive_Basic") { + WaitExternalSemaphoreCommon(GraphExtSemaphoreWaitWrapper); +} + +// Timeline semaphores unsupported on AMD +#if HT_NVIDIA + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external timeline semaphore and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Timeline_Semaphore") { + WaitExternalTimelineSemaphoreCommon(GraphExtSemaphoreWaitWrapper); +} +#endif + +/** + * Test Description + * ------------------------ + * - Verify that node parameters get updated correctly by creating a node with valid but + * incorrect parameters, and the setting them to the correct values. The graph is run and it is + * verified that the graph node waits for the external binary semaphores and operation finishes + * successfully. + * Test source + * ------------------------ + * - unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Positive_Multiple_Semaphores") { + WaitExternalMultipleSemaphoresCommon(GraphExtSemaphoreWaitWrapper); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphExternalSemaphoresWaitNodeSetParams behavior with invalid + * arguments: + * -# Nullptr graph node + * -# Nullptr params + * Test source + * ------------------------ + * - /unit/vulkan_interop/hipGraphExternalSemaphoresWaitNodeSetParams.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphExternalSemaphoresWaitNodeSetParams_Vulkan_Negative_Parameters") { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + VulkanTest vkt(enable_validation); + hipExternalSemaphoreWaitParams wait_params = {}; + wait_params.params.fence.value = 1; + auto hip_ext_semaphore = ImportBinarySemaphore(vkt); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = &hip_ext_semaphore; + node_params.paramsArray = &wait_params; + node_params.numExtSems = 1; + + SECTION("node == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeSetParams(nullptr, &node_params), + hipErrorInvalidValue); + } + + hipGraphNode_t node = nullptr; + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphExternalSemaphoresWaitNodeSetParams(node, nullptr), + hipErrorInvalidValue); + } + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + HIP_CHECK(hipGraphDestroy(graph)); +} diff --git a/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc b/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc index edebebe52a..ee8a175b6f 100644 --- a/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc +++ b/catch/unit/vulkan_interop/hipWaitExternalSemaphoresAsync.cc @@ -20,178 +20,21 @@ THE SOFTWARE. */ #include "vulkan_test.hh" - -constexpr bool enable_validation = false; +#include "wait_semaphore_common.hh" TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Binary_Semaphore") { - VulkanTest vkt(enable_validation); - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); - - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; - hip_ext_semaphore_wait_params.flags = 0; - hip_ext_semaphore_wait_params.params.fence.value = 0; - HIP_CHECK(hipWaitExternalSemaphoresAsync(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, - nullptr)); - PollStream(nullptr, hipErrorNotReady); - - VkSubmitInfo submit_info = {}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = 1; - submit_info.pSignalSemaphores = &semaphore; - - *src_storage.host_ptr = 42; - - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - PollStream(nullptr, hipSuccess); - - REQUIRE(42 == *dst_storage.host_ptr); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + WaitExternalSemaphoreCommon(hipWaitExternalSemaphoresAsync); } // Timeline semaphores unsupported on AMD #if HT_NVIDIA TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Timeline_Semaphore") { - VulkanTest vkt(enable_validation); - - const auto [wait_value, signal_value] = - GENERATE(std::make_pair(2, 2), std::make_pair(2, 3), std::make_pair(3, 2)); - INFO("Wait value: " << wait_value << ", signal value: " << signal_value); - - const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); - const auto hip_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); - hipExternalSemaphore_t hip_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); - - hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; - hip_ext_semaphore_wait_params.flags = 0; - hip_ext_semaphore_wait_params.params.fence.value = wait_value; - HIP_CHECK(hipWaitExternalSemaphoresAsync(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, - nullptr)); - PollStream(nullptr, hipErrorNotReady); - - VkSemaphoreSignalInfo signal_info = {}; - signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; - signal_info.semaphore = semaphore; - signal_info.value = signal_value; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - if (wait_value > signal_value) { - PollStream(nullptr, hipErrorNotReady); - signal_info.value = wait_value; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - } - PollStream(nullptr, hipSuccess); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); + WaitExternalTimelineSemaphoreCommon(hipWaitExternalSemaphoresAsync); } #endif TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Positive_Multiple_Semaphores") { - VulkanTest vkt(enable_validation); - -#if HT_AMD - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; -#else - constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; -#endif - - constexpr uint32_t count = 1; - const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); - const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); - - const auto command_buffer = vkt.GetCommandBuffer(); - - VkCommandBufferBeginInfo begin_info = {}; - begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); - VkBufferCopy buffer_copy = {}; - buffer_copy.size = count * sizeof(*src_storage.host_ptr); - vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); - VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); - - const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); - const auto hip_binary_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); - hipExternalSemaphore_t hip_binary_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); - - const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); - const auto hip_timeline_sem_handle_desc = - vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); - hipExternalSemaphore_t hip_timeline_ext_semaphore; - HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); - - hipExternalSemaphoreWaitParams binary_semaphore_wait_params = {}; - binary_semaphore_wait_params.params.fence.value = 0; - - hipExternalSemaphoreWaitParams timeline_semaphore_wait_params = {}; - timeline_semaphore_wait_params.params.fence.value = - second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 0; - - hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; - hipExternalSemaphoreWaitParams wait_params[] = {binary_semaphore_wait_params, - timeline_semaphore_wait_params}; - HIP_CHECK(hipWaitExternalSemaphoresAsync(ext_semaphores, wait_params, 2, nullptr)); - - PollStream(nullptr, hipErrorNotReady); - - if (second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) { - VkSemaphoreSignalInfo signal_info = {}; - signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; - signal_info.semaphore = timeline_semaphore; - signal_info.value = 1; - VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); - - PollStream(nullptr, hipErrorNotReady); - } - - VkSubmitInfo submit_info = {}; - VkSemaphore signal_semaphores[] = {binary_semaphore, timeline_semaphore}; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &command_buffer; - submit_info.signalSemaphoreCount = second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 2; - submit_info.pSignalSemaphores = - second_semaphore_type == VK_SEMAPHORE_TYPE_MAX_ENUM ? &binary_semaphore : signal_semaphores; - - const auto fence = vkt.CreateFence(); - VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); - VK_CHECK_RESULT( - vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); - - PollStream(nullptr, hipSuccess); - - HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); - HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); + WaitExternalMultipleSemaphoresCommon(hipWaitExternalSemaphoresAsync); } TEST_CASE("Unit_hipWaitExternalSemaphoresAsync_Vulkan_Negative_Parameters") { diff --git a/catch/unit/vulkan_interop/wait_semaphore_common.hh b/catch/unit/vulkan_interop/wait_semaphore_common.hh new file mode 100644 index 0000000000..e590a6d54a --- /dev/null +++ b/catch/unit/vulkan_interop/wait_semaphore_common.hh @@ -0,0 +1,263 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include + +constexpr bool enable_validation = false; + +template void WaitExternalSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_BINARY); + + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; + hip_ext_semaphore_wait_params.flags = 0; + hip_ext_semaphore_wait_params.params.fence.value = 0; + HIP_CHECK(f(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, nullptr)); + PollStream(nullptr, hipErrorNotReady); + + VkSubmitInfo submit_info = {}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &semaphore; + + *src_storage.host_ptr = 42; + + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + PollStream(nullptr, hipSuccess); + + REQUIRE(42 == *dst_storage.host_ptr); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} + +#if HT_NVIDIA +template void WaitExternalTimelineSemaphoreCommon(F f) { + VulkanTest vkt(enable_validation); + + const auto [wait_value, signal_value] = + GENERATE(std::make_pair(2, 2), std::make_pair(2, 3), std::make_pair(3, 2)); + INFO("Wait value: " << wait_value << ", signal value: " << signal_value); + + const auto semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_TIMELINE); + const auto hip_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(semaphore, VK_SEMAPHORE_TYPE_TIMELINE); + hipExternalSemaphore_t hip_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_ext_semaphore, &hip_sem_handle_desc)); + + hipExternalSemaphoreWaitParams hip_ext_semaphore_wait_params = {}; + hip_ext_semaphore_wait_params.flags = 0; + hip_ext_semaphore_wait_params.params.fence.value = wait_value; + HIP_CHECK(f(&hip_ext_semaphore, &hip_ext_semaphore_wait_params, 1, nullptr)); + PollStream(nullptr, hipErrorNotReady); + + VkSemaphoreSignalInfo signal_info = {}; + signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; + signal_info.semaphore = semaphore; + signal_info.value = signal_value; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + if (wait_value > signal_value) { + PollStream(nullptr, hipErrorNotReady); + signal_info.value = wait_value; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + } + PollStream(nullptr, hipSuccess); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_ext_semaphore)); +} +#endif + +template void WaitExternalMultipleSemaphoresCommon(F f) { + VulkanTest vkt(enable_validation); + +#if HT_AMD + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_BINARY; +#else + constexpr auto second_semaphore_type = VK_SEMAPHORE_TYPE_TIMELINE; +#endif + + constexpr uint32_t count = 1; + const auto src_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_SRC_BIT); + const auto dst_storage = vkt.CreateMappedStorage(count, VK_BUFFER_USAGE_TRANSFER_DST_BIT); + + const auto command_buffer = vkt.GetCommandBuffer(); + + VkCommandBufferBeginInfo begin_info = {}; + begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + VK_CHECK_RESULT(vkBeginCommandBuffer(command_buffer, &begin_info)); + VkBufferCopy buffer_copy = {}; + buffer_copy.size = count * sizeof(*src_storage.host_ptr); + vkCmdCopyBuffer(command_buffer, src_storage.buffer, dst_storage.buffer, 1, &buffer_copy); + VK_CHECK_RESULT(vkEndCommandBuffer(command_buffer)); + + const auto binary_semaphore = vkt.CreateExternalSemaphore(VK_SEMAPHORE_TYPE_BINARY); + const auto hip_binary_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(binary_semaphore, VK_SEMAPHORE_TYPE_BINARY); + hipExternalSemaphore_t hip_binary_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_binary_ext_semaphore, &hip_binary_sem_handle_desc)); + + const auto timeline_semaphore = vkt.CreateExternalSemaphore(second_semaphore_type); + const auto hip_timeline_sem_handle_desc = + vkt.BuildSemaphoreDescriptor(timeline_semaphore, second_semaphore_type); + hipExternalSemaphore_t hip_timeline_ext_semaphore; + HIP_CHECK(hipImportExternalSemaphore(&hip_timeline_ext_semaphore, &hip_timeline_sem_handle_desc)); + + hipExternalSemaphoreWaitParams binary_semaphore_wait_params = {}; + binary_semaphore_wait_params.params.fence.value = 0; + + hipExternalSemaphoreWaitParams timeline_semaphore_wait_params = {}; + timeline_semaphore_wait_params.params.fence.value = + second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 0; + + hipExternalSemaphore_t ext_semaphores[] = {hip_binary_ext_semaphore, hip_timeline_ext_semaphore}; + hipExternalSemaphoreWaitParams wait_params[] = {binary_semaphore_wait_params, + timeline_semaphore_wait_params}; + HIP_CHECK(f(ext_semaphores, wait_params, 2, nullptr)); + + PollStream(nullptr, hipErrorNotReady); + + if (second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE) { + VkSemaphoreSignalInfo signal_info = {}; + signal_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO; + signal_info.semaphore = timeline_semaphore; + signal_info.value = 1; + VK_CHECK_RESULT(vkSignalSemaphore(vkt.GetDevice(), &signal_info)); + + PollStream(nullptr, hipErrorNotReady); + } + + VkSubmitInfo submit_info = {}; + VkSemaphore signal_semaphores[] = {binary_semaphore, timeline_semaphore}; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &command_buffer; + submit_info.signalSemaphoreCount = second_semaphore_type == VK_SEMAPHORE_TYPE_TIMELINE ? 1 : 2; + submit_info.pSignalSemaphores = + second_semaphore_type == VK_SEMAPHORE_TYPE_MAX_ENUM ? &binary_semaphore : signal_semaphores; + + const auto fence = vkt.CreateFence(); + VK_CHECK_RESULT(vkQueueSubmit(vkt.GetQueue(), 1, &submit_info, fence)); + VK_CHECK_RESULT( + vkWaitForFences(vkt.GetDevice(), 1, &fence, VK_TRUE, 5'000'000'000 /*5 seconds*/)); + + PollStream(nullptr, hipSuccess); + + HIP_CHECK(hipDestroyExternalSemaphore(hip_timeline_ext_semaphore)); + HIP_CHECK(hipDestroyExternalSemaphore(hip_binary_ext_semaphore)); +} + +static inline bool operator==(const hipExternalSemaphoreWaitNodeParams& lhs, + const hipExternalSemaphoreWaitNodeParams& rhs) { + bool equal = true; + if (lhs.numExtSems != rhs.numExtSems) { + return false; + } + for (unsigned int i = 0; i < lhs.numExtSems; i++) { + if ((lhs.extSemArray[i] != rhs.extSemArray[i]) || + (lhs.paramsArray[i].params.fence.value != rhs.paramsArray[i].params.fence.value)) { + equal = false; + break; + } + } + return equal; +} + +template +hipError_t GraphExtSemaphoreWaitWrapper(hipExternalSemaphore_t* extSemArray, + hipExternalSemaphoreWaitParams* paramsArray, + unsigned int numExtSems, hipStream_t stream) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + hipExternalSemaphoreWaitNodeParams retrieved_params = {}; + memset(&retrieved_params, 0, sizeof(retrieved_params)); + + hipExternalSemaphoreWaitNodeParams node_params = {}; + node_params.extSemArray = extSemArray; + node_params.paramsArray = paramsArray; + node_params.numExtSems = numExtSems; + + if constexpr (set_params) { + hipExternalSemaphoreWaitParams* wait_params = new hipExternalSemaphoreWaitParams[numExtSems]; + for (unsigned int i = 0; i < numExtSems; i++) { + wait_params[i].flags = 0; + wait_params[i].params.fence.value = 10 + i; + } + + hipExternalSemaphoreWaitNodeParams initial_params = {}; + initial_params.extSemArray = extSemArray; + initial_params.paramsArray = wait_params; + initial_params.numExtSems = numExtSems; + + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &initial_params)); + + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(initial_params == retrieved_params); + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeSetParams(node, &node_params)); + + delete[] wait_params; + } else { + HIP_CHECK(hipGraphAddExternalSemaphoresWaitNode(&node, graph, nullptr, 0, &node_params)); + } + + HIP_CHECK(hipGraphExternalSemaphoresWaitNodeGetParams(node, &retrieved_params)); + REQUIRE(node_params == retrieved_params); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} From 27eeaa29a7d5161bc53d0164b415657f35f294b7 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Thu, 28 Dec 2023 18:29:54 +0100 Subject: [PATCH 56/71] EXSWHTEC-374 - Implement tests for Graph device memory APIs #452 Change-Id: Ic82e3cda60296ec7b2acb9b5755c51ccbdb85c8d --- catch/unit/graph/CMakeLists.txt | 3 + .../graph/hipDeviceGetGraphMemAttribute.cc | 205 ++++++++++++++++++ catch/unit/graph/hipDeviceGraphMemTrim.cc | 73 +++++++ .../graph/hipDeviceSetGraphMemAttribute.cc | 117 ++++++++++ 4 files changed, 398 insertions(+) create mode 100644 catch/unit/graph/hipDeviceGetGraphMemAttribute.cc create mode 100644 catch/unit/graph/hipDeviceGraphMemTrim.cc create mode 100644 catch/unit/graph/hipDeviceSetGraphMemAttribute.cc diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index c7b4036c49..d6519375a4 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -154,6 +154,9 @@ set(TEST_SRC hipDrvGraphMemcpyNodeGetParams.cc hipDrvGraphMemcpyNodeSetParams.cc hipDrvGraphAddMemsetNode.cc + hipDeviceSetGraphMemAttribute.cc + hipDeviceGetGraphMemAttribute.cc + hipDeviceGraphMemTrim.cc ) add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) diff --git a/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc b/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc new file mode 100644 index 0000000000..7aa10fe61c --- /dev/null +++ b/catch/unit/graph/hipDeviceGetGraphMemAttribute.cc @@ -0,0 +1,205 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +/** + * @addtogroup hipDeviceGetGraphMemAttribute hipDeviceGetGraphMemAttribute + * @{ + * @ingroup GraphTest + * `hipDeviceGetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value)` - + * Get the mem attribute for graphs. + */ + +static constexpr auto element_count{64 * 1024 * 1024}; + + +/* Create graph with memory node */ +static void createGraph(hipGraphExec_t* graph_exec, int** device_alloc = nullptr) { + constexpr size_t num_bytes = element_count * sizeof(int); + + hipGraph_t graph; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t alloc_node; + hipMemAllocNodeParams alloc_param; + memset(&alloc_param, 0, sizeof(alloc_param)); + alloc_param.bytesize = num_bytes; + alloc_param.poolProps.allocType = hipMemAllocationTypePinned; + alloc_param.poolProps.location.id = 0; + alloc_param.poolProps.location.type = hipMemLocationTypeDevice; + + HIP_CHECK(hipGraphAddMemAllocNode(&alloc_node, graph, nullptr, 0, &alloc_param)); + REQUIRE(alloc_param.dptr != nullptr); + int* A_d = reinterpret_cast(alloc_param.dptr); + + if (device_alloc == nullptr) { + hipGraphNode_t free_node; + HIP_CHECK(hipGraphAddMemFreeNode(&free_node, graph, &alloc_node, 1, (void*)A_d)); + } else { + *device_alloc = A_d; + } + + // Instantiate graph + HIP_CHECK(hipGraphInstantiate(graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphDestroy(graph)); +} + +/* check if memory attributes for graphs contain expected values */ +static void checkGraphMemAttribute(size_t used_mem, size_t high_mem) { + size_t read_mem; + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemCurrent; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == used_mem); + + attr = hipGraphMemAttrReservedMemCurrent; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == used_mem); + + attr = hipGraphMemAttrUsedMemHigh; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == high_mem); + + attr = hipGraphMemAttrReservedMemHigh; + HIP_CHECK(hipDeviceGetGraphMemAttribute(0, attr, reinterpret_cast(&read_mem))); + REQUIRE(read_mem == high_mem); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that hipDeviceGetGraphMemAttribute return correct memory attribute values + * when graphs with allocation nodes are launched, and after memory is freed to OS. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Positive_DoubleMemory") { + hipGraphExec_t graph_exec1, graph_exec2; + int *dev_p1, *dev_p2; + + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + + createGraph(&graph_exec1, &dev_p1); + HIP_CHECK(hipGraphLaunch(graph_exec1, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + createGraph(&graph_exec2, &dev_p2); + HIP_CHECK(hipGraphLaunch(graph_exec2, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(2 * element_count * sizeof(int), 2 * element_count * sizeof(int)); + + HIP_CHECK(hipFree(dev_p1)); + HIP_CHECK(hipFree(dev_p2)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec1)); + HIP_CHECK(hipGraphExecDestroy(graph_exec2)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); + checkGraphMemAttribute(0, 2 * element_count * sizeof(int)); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that hipDeviceGetGraphMemAttribute return correct memory attribute values + * when graphs with allocation and free nodes are launched, and after memory is freed to OS. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Positive_ReuseMemory") { + hipGraphExec_t graph_exec1, graph_exec2; + + StreamGuard stream_guard(Streams::created); + hipStream_t stream = stream_guard.stream(); + + createGraph(&graph_exec1); + HIP_CHECK(hipGraphLaunch(graph_exec1, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + createGraph(&graph_exec2); + HIP_CHECK(hipGraphLaunch(graph_exec2, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); + + checkGraphMemAttribute(element_count * sizeof(int), element_count * sizeof(int)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec1)); + HIP_CHECK(hipGraphExecDestroy(graph_exec2)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); + checkGraphMemAttribute(0, element_count * sizeof(int)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceGetGraphMemAttribute behavior with invalid arguments: + * -# Device is not valid + * -# Attribute value is not valid + * -# Get value is nullptr + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGetGraphMemAttribute_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemHigh; + size_t get_value = 0; + + SECTION("Device is not valid") { + HIP_CHECK_ERROR( + hipDeviceGetGraphMemAttribute(num_dev, attr, reinterpret_cast(&get_value)), + hipErrorInvalidDevice); + } + + SECTION("Attribute value is not valid") { + HIP_CHECK_ERROR(hipDeviceGetGraphMemAttribute(0, static_cast(0x7), + reinterpret_cast(&get_value)), + hipErrorInvalidValue); + } + + SECTION("Get value is nullptr") { + HIP_CHECK_ERROR(hipDeviceGetGraphMemAttribute(0, attr, nullptr), hipErrorInvalidValue); + } +} diff --git a/catch/unit/graph/hipDeviceGraphMemTrim.cc b/catch/unit/graph/hipDeviceGraphMemTrim.cc new file mode 100644 index 0000000000..5d730a3f88 --- /dev/null +++ b/catch/unit/graph/hipDeviceGraphMemTrim.cc @@ -0,0 +1,73 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +/** + * @addtogroup hipDeviceGraphMemTrim hipDeviceGraphMemTrim + * @{ + * @ingroup GraphTest + * `hipDeviceGraphMemTrim(int device)` - Free unused memory on specific device used for graph back + * to OS. + */ + +/** + * Test Description + * ------------------------ + * - Basic test to verify that unused memory used for graph can be freed on each device. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGraphMemTrim.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGraphMemTrim_Positive_Default") { + const auto device = GENERATE(range(0, HipTest::getDeviceCount())); + + // Check for each device + HIP_CHECK(hipDeviceGraphMemTrim(device)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceGraphMemTrim behavior with invalid arguments: + * -# Device is not valid + * Test source + * ------------------------ + * - /unit/graph/hipDeviceGraphMemTrim.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceGraphMemTrim_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + SECTION("Device is not valid") { + HIP_CHECK_ERROR(hipDeviceGraphMemTrim(num_dev), hipErrorInvalidDevice); + } +} diff --git a/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc b/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc new file mode 100644 index 0000000000..a103b12fee --- /dev/null +++ b/catch/unit/graph/hipDeviceSetGraphMemAttribute.cc @@ -0,0 +1,117 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +/** + * @addtogroup hipDeviceSetGraphMemAttribute hipDeviceSetGraphMemAttribute + * @{ + * @ingroup GraphTest + * `hipDeviceSetGraphMemAttribute(int device, hipGraphMemAttributeType attr, void* value)` - + * Set the mem attribute for graphs. + */ + +static void GraphSetGetAttribute(int device, hipGraphMemAttributeType attr, size_t set_value) { + size_t get_value = 100; + HIP_CHECK(hipDeviceSetGraphMemAttribute(device, attr, &set_value)); + HIP_CHECK(hipDeviceGetGraphMemAttribute(device, attr, &get_value)); + REQUIRE(get_value == set_value); +} + +/** + * Test Description + * ------------------------ + * - Basic test to verify that valid attributes can be reset to zero. + * Test source + * ------------------------ + * - /unit/graph/hipDeviceSetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceSetGraphMemAttribute_Positive_Default") { + const auto device = GENERATE(range(0, HipTest::getDeviceCount())); + const auto attr_type = GENERATE(hipGraphMemAttrUsedMemHigh, hipGraphMemAttrReservedMemHigh); + + // Check if attributes can be reset + size_t set_value = 0; + GraphSetGetAttribute(device, attr_type, set_value); +} + + +/** + * Test Description + * ------------------------ + * - Test to verify hipDeviceSetGraphMemAttribute behavior with invalid arguments: + * -# Device is not valid + * -# Attribute value is not supported + * -# Attribute value is not valid + * -# Set hipGraphMemAttrUsedMemHigh to non-zero + * -# Set hipGraphMemAttrReservedMemHigh to non-zero + * Test source + * ------------------------ + * - /unit/graph/hipDeviceSetGraphMemAttribute.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipDeviceSetGraphMemAttribute_Negative_Parameters") { + int device_id = 0; + HIP_CHECK(hipSetDevice(device_id)); + + int num_dev = 0; + HIP_CHECK(hipGetDeviceCount(&num_dev)); + + hipGraphMemAttributeType attr = hipGraphMemAttrUsedMemHigh; + size_t set_value = 0; + + SECTION("device is not valid") { + HIP_CHECK_ERROR( + hipDeviceSetGraphMemAttribute(num_dev, attr, reinterpret_cast(&set_value)), + hipErrorInvalidDevice); + } + + SECTION("Attribute value is not supported") { + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, hipGraphMemAttrUsedMemCurrent, + reinterpret_cast(&set_value)), + hipErrorInvalidValue); + } + + SECTION("Attribute value is not valid") { + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, static_cast(0x7), + reinterpret_cast(&set_value)), + hipErrorInvalidValue); + } + + SECTION("Set hipGraphMemAttrUsedMemHigh to non-zero") { + size_t invalid_value = 1; + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, attr, reinterpret_cast(&invalid_value)), + hipErrorInvalidValue); + } + + SECTION("Set hipGraphMemAttrReservedMemHigh to non-zero") { + attr = hipGraphMemAttrReservedMemHigh; + size_t invalid_value = 1; + HIP_CHECK_ERROR(hipDeviceSetGraphMemAttribute(0, attr, reinterpret_cast(&invalid_value)), + hipErrorInvalidValue); + } +} From e0c3f64e78641d002773f0b9abe74299ee92bbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:04:09 +0100 Subject: [PATCH 57/71] EXSWHTEC-381 - Implement tests for Surface Object device functions #453 Change-Id: I750ac29781637187d59ad0a2291a1d400f97cd83 --- catch/unit/surface/CMakeLists.txt | 15 +- .../surface/{hipSurfaceObj1D.cc => surf1D.cc} | 214 +++++------ catch/unit/surface/surf1DLayered.cc | 294 +++++++++++++++ .../surface/{hipSurfaceObj2D.cc => surf2D.cc} | 273 +++++++------- catch/unit/surface/surf2DLayered.cc | 338 +++++++++++++++++ .../surface/{hipSurfaceObj3D.cc => surf3D.cc} | 248 ++++++------- catch/unit/surface/surfCubemap.cc | 338 +++++++++++++++++ catch/unit/surface/surfCubemapLayered.cc | 340 ++++++++++++++++++ 8 files changed, 1654 insertions(+), 406 deletions(-) rename catch/unit/surface/{hipSurfaceObj1D.cc => surf1D.cc} (60%) create mode 100644 catch/unit/surface/surf1DLayered.cc rename catch/unit/surface/{hipSurfaceObj2D.cc => surf2D.cc} (55%) create mode 100644 catch/unit/surface/surf2DLayered.cc rename catch/unit/surface/{hipSurfaceObj3D.cc => surf3D.cc} (64%) create mode 100644 catch/unit/surface/surfCubemap.cc create mode 100644 catch/unit/surface/surfCubemapLayered.cc diff --git a/catch/unit/surface/CMakeLists.txt b/catch/unit/surface/CMakeLists.txt index d2afb5c702..43c7eee343 100644 --- a/catch/unit/surface/CMakeLists.txt +++ b/catch/unit/surface/CMakeLists.txt @@ -20,13 +20,22 @@ # Common Tests - Test independent of all platforms set(TEST_SRC - hipSurfaceObj1D.cc - hipSurfaceObj2D.cc - hipSurfaceObj3D.cc hipCreateSurfaceObject.cc hipDestroySurfaceObject.cc + surf1D.cc + surf1DLayered.cc + surf2D.cc + surf2DLayered.cc + surf3D.cc + surfCubemap.cc ) +if(HIP_PLATFORM MATCHES "nvidia") # Disabled on AMD due to defect EXSWHTEC-377 +set(TEST_SRC + ${TEST_SRC} + surfCubemapLayered.cc) +endif() + hip_add_exe_to_target(NAME SurfaceTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests) \ No newline at end of file diff --git a/catch/unit/surface/hipSurfaceObj1D.cc b/catch/unit/surface/surf1D.cc similarity index 60% rename from catch/unit/surface/hipSurfaceObj1D.cc rename to catch/unit/surface/surf1D.cc index 701a99666d..20286ef483 100644 --- a/catch/unit/surface/hipSurfaceObj1D.cc +++ b/catch/unit/surface/surf1D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,18 +19,22 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf1D surf1D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wunused-parameter" template -__global__ void -surf1DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width) -{ +__global__ void surf1DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -37,10 +44,7 @@ surf1DKernelR(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf1DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width) -{ +__global__ void surf1DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -50,10 +54,8 @@ surf1DKernelW(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf1DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width) -{ +__global__ void surf1DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; if (x < width) { @@ -64,14 +66,11 @@ surf1DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width) -{ +template static void runTestR(const int width) { unsigned int size = width * sizeof(T); - T *hData = (T*) malloc (size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } @@ -91,12 +90,12 @@ static void runTestR(const int width) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - T *hOutputData = nullptr; + T* hOutputData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelR<<>>(surfaceObject, hOutputData, width); @@ -105,8 +104,8 @@ static void runTestR(const int width) for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -115,14 +114,11 @@ static void runTestR(const int width) HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width) -{ +template static void runTestW(const int width) { unsigned int size = width * sizeof(T); - T *hData = nullptr; + T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); memset(hData, 0, size); @@ -142,27 +138,26 @@ static void runTestW(const int width) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelW<<>>(surfaceObject, hData, width); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); HIP_CHECK(hipMemcpyFromArray(hOutputData, hipArray, 0, 0, size, hipMemcpyDeviceToHost)); for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -171,18 +166,13 @@ static void runTestW(const int width) HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } - -template -static void runTestRW(const int width) -{ +template static void runTestRW(const int width) { unsigned int size = width * sizeof(T); - T *hData = (T*) malloc (size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int j = 0; j < width; j++) - { + for (int j = 0; j < width; j++) { initVal(hData[j]); } @@ -210,24 +200,24 @@ static void runTestRW(const int width) resOutDesc.res.array.array = hipOutArray; hipSurfaceObject_t outSurfaceObject = 0; - HIP_CHECK(hipCreateSurfaceObject (&outSurfaceObject, &resOutDesc)); + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock (16, 1, 1); - dim3 dimGrid ((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); surf1DKernelRW<<>>(surfaceObject, outSurfaceObject, width); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); HIP_CHECK(hipMemcpyFromArray(hOutputData, hipOutArray, 0, 0, size, hipMemcpyDeviceToHost)); for (int j = 0; j < width; j++) { if (!isEqual(hData[j], hOutputData[j])) { - printf("Difference [ %d ]:%s ----%s\n", j, - getString(hData[j]).c_str(), getString(hOutputData[j]).c_str()); + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); REQUIRE(false); } } @@ -238,83 +228,67 @@ static void runTestRW(const int width) HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_R - 31") { - runTestR(31); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 67") { - runTestR(67); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 131") { - runTestR(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_R - 263") { - runTestR(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestR(width); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_W - 31") { - runTestW(31); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 63") { - runTestW(63); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 131") { - runTestW(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_W - 263") { - runTestW(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestW(width); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj1D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf1Dread` and `surf1Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj1D_type_RW - 23") { - runTestRW(23); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 67") { - runTestRW(67); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 131") { - runTestRW(131); - } - - SECTION("Unit_hipSurfaceObj1D_type_RW - 263") { - runTestRW(263); - } + const int width = GENERATE(31, 67, 131, 263); + runTestRW(width); } diff --git a/catch/unit/surface/surf1DLayered.cc b/catch/unit/surface/surf1DLayered.cc new file mode 100644 index 0000000000..3432524527 --- /dev/null +++ b/catch/unit/surface/surf1DLayered.cc @@ -0,0 +1,294 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surf1DLayered surf1DLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +template +__global__ void surf1DLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + surf1DLayeredread(outputData + x, surfaceObject, x * sizeof(T), 0); + } +#endif +} + +template +__global__ void surf1DLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + surf1DLayeredwrite(inputData[x], surfaceObject, x * sizeof(T), 0); + } +#endif +} + +template +__global__ void surf1DLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < width) { + T data; + surf1DLayeredread(&data, surfaceObject, x * sizeof(T), 0); + surf1DLayeredwrite(data, outputSurfObj, x * sizeof(T), 0); + } +#endif +} + +template static void runTestR(const int width) { + unsigned int size = width * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelR<<>>(surfaceObject, hOutputData, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width) { + unsigned int size = width * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelW<<>>(surfaceObject, hData, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpyFromArray(hOutputData, hipArray, 0, 0, size, hipMemcpyDeviceToHost)); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width) { + unsigned int size = width * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int j = 0; j < width; j++) { + initVal(hData[j]); + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + HIP_CHECK(hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, 0, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 1, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, 1, 1); + + surf1DLayeredKernelRW<<>>(surfaceObject, outSurfaceObject, width); + + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpyFromArray(hOutputData, hipOutArray, 0, 0, size, hipMemcpyDeviceToHost)); + + for (int j = 0; j < width; j++) { + if (!isEqual(hData[j], hOutputData[j])) { + printf("Difference [ %d ]:%s ----%s\n", j, getString(hData[j]).c_str(), + getString(hOutputData[j]).c_str()); + REQUIRE(false); + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayeredread_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestR(width); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestW(width); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf1DLayeredread` and `surf1DLayeredwrite` together, with different types + * and dimensions. Test source + * ------------------------ + * - unit/surface/surf1DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf1DLayered_Positive_ReadWrite", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67, 131, 263); + runTestRW(width); +} diff --git a/catch/unit/surface/hipSurfaceObj2D.cc b/catch/unit/surface/surf2D.cc similarity index 55% rename from catch/unit/surface/hipSurfaceObj2D.cc rename to catch/unit/surface/surf2D.cc index 1fdc0eee9e..ca504b178b 100644 --- a/catch/unit/surface/hipSurfaceObj2D.cc +++ b/catch/unit/surface/surf2D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,8 +19,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf2D surf2D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" @@ -26,10 +36,8 @@ THE SOFTWARE. #define LOG_DATA 0 template -__global__ void -surf2DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width, int height) -{ +__global__ void surf2DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -40,10 +48,8 @@ surf2DKernelR(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf2DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width, int height) -{ +__global__ void surf2DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -54,10 +60,8 @@ surf2DKernelW(hipSurfaceObject_t surfaceObject, } template -__global__ void -surf2DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width, int height) -{ +__global__ void surf2DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width, int height) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -69,29 +73,24 @@ surf2DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width, const int height) -{ +template static void runTestR(const int width, const int height) { unsigned int size = width * height * sizeof(T); - T* hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -106,8 +105,8 @@ static void runTestR(const int width, const int height) HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelR<<>>(surfaceObject, hOutputData, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); @@ -116,8 +115,8 @@ static void runTestR(const int width, const int height) for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -127,12 +126,9 @@ static void runTestR(const int width, const int height) HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width, const int height) -{ +template static void runTestW(const int width, const int height) { unsigned int size = width * height * sizeof(T); T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); @@ -140,13 +136,12 @@ static void runTestW(const int width, const int height) hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -157,32 +152,30 @@ static void runTestW(const int width, const int height) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelW<<>>(surfaceObject, hData, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T* hOutputData = (T*) malloc(size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); - HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, - height, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -192,40 +185,33 @@ static void runTestW(const int width, const int height) HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } -template -static void runTestRW(const int width, const int height) -{ +template static void runTestRW(const int width, const int height) { unsigned int size = width * height * sizeof(T); - T* hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); - for (int i = 0; i < height; i++) - { - for (int j = 0; j < width; j++) - { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { initVal(hData[i * width + j]); } } #if LOG_DATA - printf ("hData: "); - for (int i = 0; i < 32; i++) - { - printf ("%s ", getString(hData[i]).c_str()); + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); } - printf ("\n"); + printf("\n"); #endif hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); hipArray_t hipArray = nullptr, hipOutArray = nullptr; - HIP_CHECK(hipMallocArray (&hipArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); // Need set source pitch, but we don't have any padding here const size_t spitch = width * sizeof(T); - HIP_CHECK(hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, - hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); hipResourceDesc resDesc; memset(&resDesc, 0, sizeof(resDesc)); @@ -236,8 +222,7 @@ static void runTestRW(const int width, const int height) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, - hipArraySurfaceLoadStore)); + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); hipResourceDesc resOutDesc; memset(&resOutDesc, 0, sizeof(resOutDesc)); @@ -245,35 +230,34 @@ static void runTestRW(const int width, const int height) resOutDesc.res.array.array = hipOutArray; hipSurfaceObject_t outSurfaceObject = 0; - HIP_CHECK(hipCreateSurfaceObject (&outSurfaceObject, &resOutDesc)); + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock (16, 16, 1); - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, 1); + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); surf2DKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T* hOutputData = (T*) malloc(size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); - HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, - height, hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); #if LOG_DATA - printf ("dData: "); - for (int i = 0; i < 32; i++) - { - printf ("%s ", getString(hOutputData[i]).c_str()); + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); } - printf ("\n"); + printf("\n"); #endif for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { int index = i * width + j; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d ]:%s ----%s\n", i, j, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } @@ -285,83 +269,70 @@ static void runTestRW(const int width, const int height) HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_R - 23, 67") { - runTestR(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 67, 23") { - runTestR(67, 23); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 131, 67") { - runTestR(131, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_R - 263, 131") { - runTestR(263, 131); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_W - 23, 67") { - runTestW(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 67, 23") { - runTestW(67, 23); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 131, 67") { - runTestW(131, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_W - 263, 23") { - runTestW(263, 23); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj2D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf2Dread` and `surf2Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj2D_type_RW - 23, 67") { - runTestRW(23, 67); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 67, 131") { - runTestRW(67, 131); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 131, 263") { - runTestRW(131, 263); - } - - SECTION("Unit_hipSurfaceObj2D_type_RW - 263, 67") { - runTestRW(263, 67); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); } diff --git a/catch/unit/surface/surf2DLayered.cc b/catch/unit/surface/surf2DLayered.cc new file mode 100644 index 0000000000..c8f06bdfc2 --- /dev/null +++ b/catch/unit/surface/surf2DLayered.cc @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surf2DLayered surf2DLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surf2DLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surf2DLayeredread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surf2DLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surf2DLayeredwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surf2DLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surf2DLayeredread(&data, surfaceObject, x * sizeof(T), y, 0); + surf2DLayeredwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surf2DLayeredKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayeredread_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surf2DLayeredread` and `surf2DLayeredwrite` together, with different types + * and dimensions. Test source + * ------------------------ + * - unit/surface/surf2DLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf2DLayered_Positive_ReadWrite", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} diff --git a/catch/unit/surface/hipSurfaceObj3D.cc b/catch/unit/surface/surf3D.cc similarity index 64% rename from catch/unit/surface/hipSurfaceObj3D.cc rename to catch/unit/surface/surf3D.cc index 7cc3889e6f..d209f09115 100644 --- a/catch/unit/surface/hipSurfaceObj3D.cc +++ b/catch/unit/surface/surf3D.cc @@ -1,13 +1,16 @@ /* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -16,50 +19,49 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include + +/** + * @addtogroup surf3D surf3D + * @{ + * @ingroup SurfaceTest + */ + #include +#include #include #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wunused-parameter" template -__global__ void -surf3DKernelR(hipSurfaceObject_t surfaceObject, - T* outputData, int width, int height, int depth) -{ +__global__ void surf3DKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height, int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; int z = blockIdx.z * blockDim.z + threadIdx.z; if (x < width && y < height && z < depth) { - surf3Dread(outputData + z * width * height + y * width + x, - surfaceObject, x * sizeof(T), y, z); + surf3Dread(outputData + z * width * height + y * width + x, surfaceObject, x * sizeof(T), y, z); } #endif } template -__global__ void -surf3DKernelW(hipSurfaceObject_t surfaceObject, - T* inputData, int width, int height, int depth) -{ +__global__ void surf3DKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, int height, + int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; int z = blockIdx.z * blockDim.z + threadIdx.z; if (x < width && y < height && z < depth) { - surf3Dwrite(inputData[z * width * height + y * width + x], - surfaceObject, x * sizeof(T), y, z); + surf3Dwrite(inputData[z * width * height + y * width + x], surfaceObject, x * sizeof(T), y, z); } #endif } template -__global__ void -surf3DKernelRW(hipSurfaceObject_t surfaceObject, - hipSurfaceObject_t outputSurfObj, int width, int height, int depth) -{ +__global__ void surf3DKernelRW(hipSurfaceObject_t surfaceObject, hipSurfaceObject_t outputSurfObj, + int width, int height, int depth) { #if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT int x = blockIdx.x * blockDim.x + threadIdx.x; int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -72,11 +74,9 @@ surf3DKernelRW(hipSurfaceObject_t surfaceObject, #endif } -template -static void runTestR(const int width, const int height, const int depth) -{ +template static void runTestR(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); for (int i = 0; i < depth; i++) { for (int j = 0; j < height; j++) { @@ -94,8 +94,8 @@ static void runTestR(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -112,12 +112,12 @@ static void runTestR(const int width, const int height, const int depth) hipSurfaceObject_t surfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); - T *hOutputData = nullptr; + T* hOutputData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); memset(hOutputData, 0, size); - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelR<<>>(surfaceObject, hOutputData, width, height, depth); @@ -130,26 +130,23 @@ static void runTestR(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); free(hData); HIP_CHECK(hipHostFree(hOutputData)); - REQUIRE(true); } -template -static void runTestW(const int width, const int height, const int depth) -{ +template static void runTestW(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = nullptr; + T* hData = nullptr; HIP_CHECK(hipHostMalloc((void**)&hData, size)); memset(hData, 0, size); @@ -161,8 +158,8 @@ static void runTestW(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -187,8 +184,8 @@ static void runTestW(const int width, const int height, const int depth) } } - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelW<<>>(surfaceObject, hData, width, height, depth); @@ -196,13 +193,13 @@ static void runTestW(const int width, const int height, const int depth) HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcArray= hipArray; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.srcArray = hipArray; myparms.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(T), width, height); myparms.extent = make_hipExtent(width, height, depth); myparms.kind = hipMemcpyDeviceToHost; @@ -214,26 +211,23 @@ static void runTestW(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipHostFree(hData)); free(hOutputData); - REQUIRE(true); } -template -static void runTestRW(const int width, const int height, const int depth) -{ +template static void runTestRW(const int width, const int height, const int depth) { unsigned int size = width * height * depth * sizeof(T); - T *hData = (T*) malloc(size); + T* hData = (T*)malloc(size); memset(hData, 0, size); for (int i = 0; i < depth; i++) { for (int j = 0; j < height; j++) { @@ -251,8 +245,8 @@ static void runTestRW(const int width, const int height, const int depth) hipMemcpy3DParms myparms; memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); myparms.srcPtr = make_hipPitchedPtr(hData, width * sizeof(T), width, height); myparms.dstArray = hipArray; myparms.extent = make_hipExtent(width, height, depth); @@ -280,8 +274,8 @@ static void runTestRW(const int width, const int height, const int depth) hipSurfaceObject_t outSurfaceObject = 0; HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); - dim3 dimBlock(8, 8, 8); // 512 threads - dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y -1)/ dimBlock.y, + dim3 dimBlock(8, 8, 8); // 512 threads + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, (depth + dimBlock.z - 1) / dimBlock.z); surf3DKernelRW<<>>(surfaceObject, outSurfaceObject, width, height, depth); @@ -289,13 +283,13 @@ static void runTestRW(const int width, const int height, const int depth) HIP_CHECK(hipGetLastError()); HIP_CHECK(hipDeviceSynchronize()); - T *hOutputData = (T*) malloc (size); + T* hOutputData = (T*)malloc(size); memset(hOutputData, 0, size); memset(&myparms, 0, sizeof(myparms)); - myparms.srcPos = make_hipPos(0,0,0); - myparms.dstPos = make_hipPos(0,0,0); - myparms.srcArray= hipOutArray; + myparms.srcPos = make_hipPos(0, 0, 0); + myparms.dstPos = make_hipPos(0, 0, 0); + myparms.srcArray = hipOutArray; myparms.dstPtr = make_hipPitchedPtr(hOutputData, width * sizeof(T), width, height); myparms.extent = make_hipExtent(width, height, depth); myparms.kind = hipMemcpyDeviceToHost; @@ -307,97 +301,87 @@ static void runTestRW(const int width, const int height, const int depth) for (int k = 0; k < width; k++) { int index = i * width * height + j * width + k; if (!isEqual(hData[index], hOutputData[index])) { - printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, - getString(hData[index]).c_str(), getString(hOutputData[index]).c_str()); + printf("Difference [ %d %d %d]:%s ----%s\n", i, j, k, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); REQUIRE(false); } } } } - HIP_CHECK(hipDestroySurfaceObject (surfaceObject)); - HIP_CHECK(hipDestroySurfaceObject (outSurfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); HIP_CHECK(hipFreeArray(hipArray)); HIP_CHECK(hipFreeArray(hipOutArray)); free(hData); free(hOutputData); - REQUIRE(true); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_R", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3Dread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_R - 31, 67, 131") { - runTestR(31, 67, 131); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 67, 31, 263") { - runTestR(67, 31, 263); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 131, 131, 67") { - runTestR(131, 131, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_R - 263, 131, 263") { - runTestR(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_W", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3Dwrite_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_W - 31, 67, 131") { - runTestW(31, 67, 131); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 67, 67, 31") { - runTestW(67, 67, 31); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 131, 131, 67") { - runTestW(131, 131, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_W - 263, 131, 263") { - runTestW(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } -TEMPLATE_TEST_CASE("Unit_hipSurfaceObj3D_type_RW", "", - char, uchar, short, ushort, int, uint, float, - char1, uchar1, short1, ushort1, int1, uint1, float1, - char2, uchar2, short2, ushort2, int2, uint2, float2, - char4, uchar4, short4, ushort4, int4, uint4, float4) -{ - CHECK_IMAGE_SUPPORT - auto err = hipGetLastError(); // reset last err due to previous negative tests +/** + * Test Description + * ------------------------ + * - Basic test for `surf3Dread` and `surf3Dwrite` together, with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surf3D.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surf3D_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; - SECTION("Unit_hipSurfaceObj3D_type_RW - 31, 31, 67") { - runTestRW(31, 31, 67); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 67, 67, 31") { - runTestRW(67, 67, 31); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 131, 67, 263") { - runTestRW(131, 67, 263); - } - - SECTION("Unit_hipSurfaceObj3D_type_RW - 263, 131, 263") { - runTestRW(263, 131, 263); - } + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + const int depth = GENERATE(4, 11); + runTestR(width, height, depth); } diff --git a/catch/unit/surface/surfCubemap.cc b/catch/unit/surface/surfCubemap.cc new file mode 100644 index 0000000000..c0fa488567 --- /dev/null +++ b/catch/unit/surface/surfCubemap.cc @@ -0,0 +1,338 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surfCubemap surfCubemap + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surfCubemapKernelR(hipSurfaceObject_t surfaceObject, T* outputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surfCubemapread(&data, surfaceObject, x * sizeof(T), y, 0); + surfCubemapwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapKernelRW<<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapread_Positive_Basic", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapwrite_Positive_Basic", "", char, uchar, short, ushort, int, + uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapread` and `surfCubemapwrite` together, with different types and + * dimensions. Test source + * ------------------------ + * - unit/surface/surfCubemap.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemap_Positive_ReadWrite", "", char, uchar, short, ushort, int, uint, + float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, uchar2, + short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, int4, + uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} diff --git a/catch/unit/surface/surfCubemapLayered.cc b/catch/unit/surface/surfCubemapLayered.cc new file mode 100644 index 0000000000..89da56ed53 --- /dev/null +++ b/catch/unit/surface/surfCubemapLayered.cc @@ -0,0 +1,340 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @addtogroup surfCubemapLayered surfCubemapLayered + * @{ + * @ingroup SurfaceTest + */ + +#include +#include +#include + +#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wunused-parameter" + +#define LOG_DATA 0 + +template +__global__ void surfCubemapLayeredKernelR(hipSurfaceObject_t surfaceObject, T* outputData, + int width, int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapLayeredread(outputData + y * width + x, surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapLayeredKernelW(hipSurfaceObject_t surfaceObject, T* inputData, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + surfCubemapLayeredwrite(inputData[y * width + x], surfaceObject, x * sizeof(T), y, 0); + } +#endif +} + +template +__global__ void surfCubemapLayeredKernelRW(hipSurfaceObject_t surfaceObject, + hipSurfaceObject_t outputSurfObj, int width, + int height) { +#if !defined(__HIP_NO_IMAGE_SUPPORT) || !__HIP_NO_IMAGE_SUPPORT + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + if (x < width && y < height) { + T data; + surfCubemapLayeredread(&data, surfaceObject, x * sizeof(T), y, 0); + surfCubemapLayeredwrite(data, outputSurfObj, x * sizeof(T), y, 0); + } +#endif +} + +template static void runTestR(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + T* hOutputData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hOutputData, size)); + memset(hOutputData, 0, size); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelR<<>>(surfaceObject, hOutputData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + free(hData); + HIP_CHECK(hipHostFree(hOutputData)); +} + +template static void runTestW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = nullptr; + HIP_CHECK(hipHostMalloc((void**)&hData, size)); + memset(hData, 0, size); + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelW<<>>(surfaceObject, hData, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipHostFree(hData)); + free(hOutputData); +} + +template static void runTestRW(const int width, const int height) { + unsigned int size = width * height * sizeof(T); + T* hData = (T*)malloc(size); + memset(hData, 0, size); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + initVal(hData[i * width + j]); + } + } +#if LOG_DATA + printf("hData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hData[i]).c_str()); + } + printf("\n"); +#endif + + hipChannelFormatDesc channelDesc = hipCreateChannelDesc(); + hipArray_t hipArray = nullptr, hipOutArray = nullptr; + HIP_CHECK(hipMallocArray(&hipArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + // Need set source pitch, but we don't have any padding here + const size_t spitch = width * sizeof(T); + HIP_CHECK( + hipMemcpy2DToArray(hipArray, 0, 0, hData, spitch, spitch, height, hipMemcpyHostToDevice)); + + hipResourceDesc resDesc; + memset(&resDesc, 0, sizeof(resDesc)); + resDesc.resType = hipResourceTypeArray; + resDesc.res.array.array = hipArray; + + // Create surface object + hipSurfaceObject_t surfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&surfaceObject, &resDesc)); + + HIP_CHECK(hipMallocArray(&hipOutArray, &channelDesc, width, height, hipArraySurfaceLoadStore)); + + hipResourceDesc resOutDesc; + memset(&resOutDesc, 0, sizeof(resOutDesc)); + resOutDesc.resType = hipResourceTypeArray; + resOutDesc.res.array.array = hipOutArray; + + hipSurfaceObject_t outSurfaceObject = 0; + HIP_CHECK(hipCreateSurfaceObject(&outSurfaceObject, &resOutDesc)); + + dim3 dimBlock(16, 16, 1); + dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x, (height + dimBlock.y - 1) / dimBlock.y, 1); + surfCubemapLayeredKernelRW + <<>>(surfaceObject, outSurfaceObject, width, height); + HIP_CHECK(hipGetLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + T* hOutputData = (T*)malloc(size); + + memset(hOutputData, 0, size); + HIP_CHECK(hipMemcpy2DFromArray(hOutputData, spitch, hipOutArray, 0, 0, spitch, height, + hipMemcpyDeviceToHost)); + +#if LOG_DATA + printf("dData: "); + for (int i = 0; i < 32; i++) { + printf("%s ", getString(hOutputData[i]).c_str()); + } + printf("\n"); +#endif + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + int index = i * width + j; + if (!isEqual(hData[index], hOutputData[index])) { + printf("Difference [ %d %d ]:%s ----%s\n", i, j, getString(hData[index]).c_str(), + getString(hOutputData[index]).c_str()); + REQUIRE(false); + } + } + } + + HIP_CHECK(hipDestroySurfaceObject(surfaceObject)); + HIP_CHECK(hipDestroySurfaceObject(outSurfaceObject)); + HIP_CHECK(hipFreeArray(hipArray)); + HIP_CHECK(hipFreeArray(hipOutArray)); + free(hData); + free(hOutputData); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredread` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayeredread_Positive_Basic", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestR(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredwrite` with different types and dimensions. + * Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayeredwrite_Positive_Basic", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestW(width, height); +} + +/** + * Test Description + * ------------------------ + * - Basic test for `surfCubemapLayeredread` and `surfCubemapLayeredwrite` together, with + * different types and dimensions. Test source + * ------------------------ + * - unit/surface/surfCubemapLayered.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.7 + */ +TEMPLATE_TEST_CASE("Unit_surfCubemapLayered_Positive_ReadWrite", "", char, uchar, short, ushort, + int, uint, float, char1, uchar1, short1, ushort1, int1, uint1, float1, char2, + uchar2, short2, ushort2, int2, uint2, float2, char4, uchar4, short4, ushort4, + int4, uint4, float4) { + CHECK_IMAGE_SUPPORT; + + const int width = GENERATE(31, 67); + const int height = GENERATE(131, 263); + runTestRW(width, height); +} From 66e28851072984fa2c02bfd3abcdfca9606da57f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:32:13 +0100 Subject: [PATCH 58/71] EXSWHTEC-382 - Implement tests for Launch API functions #454 Change-Id: I0720758144e89adaa43bcbcc6262dbb16cd4e2be --- catch/include/utils.hh | 2 +- catch/unit/CMakeLists.txt | 1 - catch/unit/executionControl/CMakeLists.txt | 2 + .../executionControl/hipExtLaunchKernel.cc | 16 +- .../executionControl/hipFuncGetAttributes.cc | 8 +- .../hipLaunchCooperativeKernel.cc | 18 +- .../unit/executionControl/hipLaunchKernel.cc | 156 ++++++++++++++++++ catch/unit/executionControl/launch_api.cc | 69 ++++++++ 8 files changed, 249 insertions(+), 23 deletions(-) create mode 100644 catch/unit/executionControl/hipLaunchKernel.cc create mode 100644 catch/unit/executionControl/launch_api.cc diff --git a/catch/include/utils.hh b/catch/include/utils.hh index f025768c14..3855308a42 100644 --- a/catch/include/utils.hh +++ b/catch/include/utils.hh @@ -170,7 +170,7 @@ inline bool DeviceAttributesSupport(const int device, Attributes... attributes) return (... && DeviceAttributeSupport(device, attributes)); } -inline int GetDeviceAttribute(int device, const hipDeviceAttribute_t attr) { +inline int GetDeviceAttribute(const hipDeviceAttribute_t attr, int device) { int value = 0; HIP_CHECK(hipDeviceGetAttribute(&value, attr, device)); return value; diff --git a/catch/unit/CMakeLists.txt b/catch/unit/CMakeLists.txt index 6b63292c91..6b38e58ff6 100644 --- a/catch/unit/CMakeLists.txt +++ b/catch/unit/CMakeLists.txt @@ -41,7 +41,6 @@ add_subdirectory(device_memory) add_subdirectory(warp) add_subdirectory(dynamicLoading) add_subdirectory(g++) -add_subdirectory(module) add_subdirectory(channelDescriptor) add_subdirectory(executionControl) add_subdirectory(math) diff --git a/catch/unit/executionControl/CMakeLists.txt b/catch/unit/executionControl/CMakeLists.txt index a27f9dc4f1..877addd79b 100644 --- a/catch/unit/executionControl/CMakeLists.txt +++ b/catch/unit/executionControl/CMakeLists.txt @@ -4,6 +4,7 @@ set(TEST_SRC hipFuncSetSharedMemConfig.cc hipFuncSetAttribute.cc hipFuncGetAttributes.cc + hipLaunchKernel.cc hipLaunchCooperativeKernel.cc hipLaunchCooperativeKernelMultiDevice.cc ) @@ -12,6 +13,7 @@ if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC ${TEST_SRC} hipExtLaunchKernel.cc hipExtLaunchMultiKernelMultiDevice.cc + launch_api.cc ) endif() diff --git a/catch/unit/executionControl/hipExtLaunchKernel.cc b/catch/unit/executionControl/hipExtLaunchKernel.cc index 8b85507de5..1b336b4d74 100644 --- a/catch/unit/executionControl/hipExtLaunchKernel.cc +++ b/catch/unit/executionControl/hipExtLaunchKernel.cc @@ -49,19 +49,19 @@ TEST_CASE("Unit_hipExtLaunchKernel_Positive_Basic") { TEST_CASE("Unit_hipExtLaunchKernel_Positive_Parameters") { SECTION("blockDim.x == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } SECTION("blockDim.y == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } SECTION("blockDim.z == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); HIP_CHECK(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u)); } @@ -111,28 +111,28 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") { } SECTION("blockDim.x > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.y > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.z > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, nullptr, 0, nullptr, nullptr, nullptr, 0u), hipErrorInvalidConfiguration); } SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)); HIP_CHECK_ERROR( hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim}, @@ -141,7 +141,7 @@ TEST_CASE("Unit_hipExtLaunchKernel_Negative_Parameters") { } SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(hipExtLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, max, nullptr, nullptr, nullptr, 0u), hipErrorOutOfMemory); diff --git a/catch/unit/executionControl/hipFuncGetAttributes.cc b/catch/unit/executionControl/hipFuncGetAttributes.cc index e97f44300e..c3ce1c835e 100644 --- a/catch/unit/executionControl/hipFuncGetAttributes.cc +++ b/catch/unit/executionControl/hipFuncGetAttributes.cc @@ -35,8 +35,8 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("binaryVersion") { #if HT_NVIDIA - const auto major = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMajor); - const auto minor = GetDeviceAttribute(0, hipDeviceAttributeComputeCapabilityMinor); + const auto major = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMajor, 0); + const auto minor = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMinor, 0); REQUIRE(attr.binaryVersion == major * 10 + minor); #elif HT_AMD REQUIRE(attr.binaryVersion > 0); @@ -48,7 +48,7 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("constSizeBytes") { REQUIRE(attr.constSizeBytes == kConstSizeBytes); } SECTION("maxThreadsPerBlock") { - REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock)); + REQUIRE(attr.maxThreadsPerBlock == GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0)); } SECTION("numRegs") { REQUIRE(attr.numRegs >= 0); } @@ -57,7 +57,7 @@ TEST_CASE("Unit_hipFuncGetAttributes_Positive_Basic") { SECTION("sharedSizeBytes") { REQUIRE(attr.sharedSizeBytes <= - GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock)); + GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); } } diff --git a/catch/unit/executionControl/hipLaunchCooperativeKernel.cc b/catch/unit/executionControl/hipLaunchCooperativeKernel.cc index eb7eb2293f..5beeed4621 100644 --- a/catch/unit/executionControl/hipLaunchCooperativeKernel.cc +++ b/catch/unit/executionControl/hipLaunchCooperativeKernel.cc @@ -61,19 +61,19 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Positive_Parameters") { } SECTION("blockDim.x == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr)); } SECTION("blockDim.y == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, nullptr, 0, nullptr)); } SECTION("blockDim.z == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); HIP_CHECK(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, nullptr, 0, nullptr)); } @@ -128,28 +128,28 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { } SECTION("blockDim.x > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.y > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.z > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, nullptr, 0, nullptr), hipErrorInvalidConfiguration); } SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)); HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{dim, dim, dim}, nullptr, 0, nullptr), @@ -163,7 +163,7 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, reinterpret_cast(kernel), 1, 0)); const unsigned int multiproc_count = - GetDeviceAttribute(0, hipDeviceAttributeMultiprocessorCount); + GetDeviceAttribute(hipDeviceAttributeMultiprocessorCount, 0); const unsigned int dim = std::ceil(std::cbrt(max_blocks * multiproc_count)); HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{dim, dim, dim}, dim3{1, 1, 1}, nullptr, 0, nullptr), @@ -171,7 +171,7 @@ TEST_CASE("Unit_hipLaunchCooperativeKernel_Negative_Parameters") { } SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(hipLaunchCooperativeKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, max, nullptr), hipErrorCooperativeLaunchTooLarge); diff --git a/catch/unit/executionControl/hipLaunchKernel.cc b/catch/unit/executionControl/hipLaunchKernel.cc new file mode 100644 index 0000000000..d9272107eb --- /dev/null +++ b/catch/unit/executionControl/hipLaunchKernel.cc @@ -0,0 +1,156 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include +#include +#include + +TEST_CASE("Unit_hipLaunchKernel_Positive_Basic") { + SECTION("Kernel with no arguments") { + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr)); + HIP_CHECK(hipDeviceSynchronize()); + } + + SECTION("Kernel with arguments using kernelParams") { + LinearAllocGuard result_dev(LinearAllocs::hipMalloc, sizeof(int)); + HIP_CHECK(hipMemset(result_dev.ptr(), 0, sizeof(*result_dev.ptr()))); + int* result_ptr = result_dev.ptr(); + void* kernel_args[1] = {&result_ptr}; + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel_42), dim3{1, 1, 1}, dim3{1, 1, 1}, + kernel_args, 0, nullptr)); + int result = 0; + HIP_CHECK(hipMemcpy(&result, result_dev.ptr(), sizeof(result), hipMemcpyDefault)); + REQUIRE(result == 42); + } +} + +TEST_CASE("Unit_hipLaunchKernel_Positive_Parameters") { + SECTION("blockDim.x == maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, + nullptr, 0, nullptr)); + } + + SECTION("blockDim.y == maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{y, 1, 1}, + nullptr, 0, nullptr)); + } + + SECTION("blockDim.z == maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); + HIP_CHECK(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{z, 1, 1}, + nullptr, 0, nullptr)); + } +} + +TEST_CASE("Unit_hipLaunchKernel_Negative_Parameters") { + SECTION("f == nullptr") { + HIP_CHECK_ERROR(hipLaunchKernel(nullptr, dim3{1, 1, 1}, dim3{1, 1, 1}, nullptr, 0, nullptr), + hipErrorInvalidDeviceFunction); + } + + SECTION("gridDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{0, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 0, 1}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("gridDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 0}, dim3{1, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{0, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.y == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 0, 1}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.z == 0") { + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 0}, + nullptr, 0, nullptr), + hipErrorInvalidValue); + } + + SECTION("blockDim.x > maxBlockDimX") { + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{x, 1, 1}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.y > maxBlockDimY") { + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, y, 1}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.z > maxBlockDimZ") { + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, z}, + nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); + const unsigned int dim = std::ceil(std::cbrt(max)); + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, + dim3{dim, dim, dim}, nullptr, 0, nullptr), + hipErrorInvalidConfiguration); + } + + SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, max, nullptr), + hipErrorOutOfMemory); + } + + SECTION("Invalid stream") { + hipStream_t stream = nullptr; + HIP_CHECK(hipStreamCreate(&stream)); + HIP_CHECK(hipStreamDestroy(stream)); + HIP_CHECK_ERROR(hipLaunchKernel(reinterpret_cast(kernel), dim3{1, 1, 1}, dim3{1, 1, 1}, + nullptr, 0, stream), + hipErrorInvalidValue); + } +} \ No newline at end of file diff --git a/catch/unit/executionControl/launch_api.cc b/catch/unit/executionControl/launch_api.cc new file mode 100644 index 0000000000..64cdcf8266 --- /dev/null +++ b/catch/unit/executionControl/launch_api.cc @@ -0,0 +1,69 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "execution_control_common.hh" + +#include +#include + +TEST_CASE("Unit_hipLaunchByPtr_Positive_Basic") { + LinearAllocGuard alloc(LinearAllocs::hipMallocManaged, 4); + + SECTION("hipConfigureCall") { HIP_CHECK(hipConfigureCall(dim3{1}, dim3{1}, 0, nullptr)); } + + SECTION("__hipPushCallConfiguration") { + HIP_CHECK(__hipPushCallConfiguration(dim3{1}, dim3{1}, 0, nullptr)); + } + + int* arg = alloc.ptr(); + HIP_CHECK(hipSetupArgument(&arg, sizeof(int*), 0)); + + HIP_CHECK(hipLaunchByPtr(reinterpret_cast(kernel_42))); + HIP_CHECK(hipDeviceSynchronize()); + + REQUIRE(alloc.ptr()[0] == 42); +} + +TEST_CASE("Unit_hipLaunchByPtr_Negative_Parameters") { + HIP_CHECK(hipConfigureCall(dim3{1}, dim3{1}, 0, nullptr)); + HIP_CHECK_ERROR(hipLaunchByPtr(nullptr), hipErrorInvalidDeviceFunction); +} + +TEST_CASE("Unit___hipPushCallConfiguration_Positive_Basic") { + StreamGuard stream_guard(Streams::created); + HIP_CHECK(__hipPushCallConfiguration(dim3{1, 2, 3}, dim3{3, 2, 1}, 1024, stream_guard.stream())); + + dim3 grid; + dim3 block; + size_t shmem; + hipStream_t stream; + HIP_CHECK(__hipPopCallConfiguration(&grid, &block, &shmem, &stream)); + + REQUIRE(grid.x == 1); + REQUIRE(grid.y == 2); + REQUIRE(grid.z == 3); + REQUIRE(block.x == 3); + REQUIRE(block.y == 2); + REQUIRE(block.z == 1); + REQUIRE(shmem == 1024); + REQUIRE(stream == stream_guard.stream()); +} \ No newline at end of file From dc78fafa49c0b0da6057c5395c3e12aa6630526d Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Fri, 2 Feb 2024 22:00:39 +0530 Subject: [PATCH 59/71] EXSWHTEC-384 - Coverage Tool and Test Plan update #455 Change-Id: Idf52308186a73ae1c4d815eb59f20ea7da99c964 --- .../coalesced_tiled_groups_metagrp.cc | 2 +- .../unit/dynamicLoading/complex_loading_behavior.cc | 2 +- catch/unit/dynamicLoading/hipApiDynamicLoad.cc | 2 +- catch/unit/event/hipEventCreateWithFlags.cc | 2 +- catch/unit/g++/hipMalloc.cc | 2 +- catch/unit/gcc/gccTest.cc | 2 +- catch/unit/graph/hipGraphAddDependencies.cc | 2 +- catch/unit/graph/hipGraphAddKernelNode.cc | 2 +- catch/unit/graph/hipGraphGetEdges.cc | 2 +- catch/unit/graph/hipGraphGetNodes.cc | 2 +- catch/unit/graph/hipGraphGetRootNodes.cc | 2 +- catch/unit/graph/hipGraphNodeGetDependencies.cc | 2 +- catch/unit/graph/hipGraphNodeGetDependentNodes.cc | 2 +- catch/unit/graph/hipGraphRemoveDependencies.cc | 2 +- catch/unit/graph/hipStreamBeginCapture.cc | 3 ++- catch/unit/kernel/hipDynamicShared.cc | 2 +- catch/unit/kernel/hipDynamicShared2.cc | 2 +- catch/unit/kernel/hipEmptyKernel.cc | 2 +- catch/unit/kernel/hipExtLaunchKernelGGL.cc | 2 +- catch/unit/kernel/hipGridLaunch.cc | 2 +- catch/unit/kernel/hipLanguageExtensions.cc | 2 +- catch/unit/kernel/hipLaunchParm.cc | 2 +- catch/unit/kernel/hipLaunchParmFunctor.cc | 2 +- catch/unit/kernel/hipPrintfKernel.cc | 2 +- catch/unit/kernel/hipTestConstant.cc | 2 +- catch/unit/kernel/hipTestGlobalVariable.cc | 2 +- catch/unit/kernel/hipTestMemKernel.cc | 2 +- catch/unit/kernel/launch_bounds.cc | 2 +- catch/unit/memory/hipHostRegister.cc | 2 +- catch/unit/memory/hipPointerGetAttributes.cc | 2 +- catch/unit/module/hipExtModuleLaunchKernel.cc | 2 +- catch/unit/p2p/hipDeviceGetP2PAttribute.cc | 2 +- catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc | 2 +- catch/unit/printf/printfFlagsNonHost.cc | 2 +- catch/unit/printf/printfHost.cc | 2 +- catch/unit/printf/printfNonHost.cc | 2 +- catch/unit/printf/printfSpecifiersNonHost.cc | 2 +- catch/unit/stream/hipStreamGetDevice.cc | 1 + utils/coverage/device_api_list.txt | 6 ++++++ utils/coverage/hipAPICoverageUtils.cpp | 13 ++++++++++--- utils/coverage/mainCoverage.cpp | 1 + 41 files changed, 56 insertions(+), 40 deletions(-) diff --git a/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc b/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc index a7f9ddc7e7..b3dbe4d0e2 100644 --- a/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc +++ b/catch/unit/cooperativeGrps/coalesced_tiled_groups_metagrp.cc @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include #include -#include + /** * @addtogroup coalesced_group thread_block_tile diff --git a/catch/unit/dynamicLoading/complex_loading_behavior.cc b/catch/unit/dynamicLoading/complex_loading_behavior.cc index c1c412052f..663ce7f5a1 100644 --- a/catch/unit/dynamicLoading/complex_loading_behavior.cc +++ b/catch/unit/dynamicLoading/complex_loading_behavior.cc @@ -18,7 +18,7 @@ THE SOFTWARE. */ #include #include -#include + /** * @addtogroup hipLaunchKernelGGL hipLaunchCooperativeKernel * @{ diff --git a/catch/unit/dynamicLoading/hipApiDynamicLoad.cc b/catch/unit/dynamicLoading/hipApiDynamicLoad.cc index e583f4a3d1..b09300bfea 100644 --- a/catch/unit/dynamicLoading/hipApiDynamicLoad.cc +++ b/catch/unit/dynamicLoading/hipApiDynamicLoad.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #include #include #include diff --git a/catch/unit/event/hipEventCreateWithFlags.cc b/catch/unit/event/hipEventCreateWithFlags.cc index 875d7f4295..cb3e0d4ed5 100644 --- a/catch/unit/event/hipEventCreateWithFlags.cc +++ b/catch/unit/event/hipEventCreateWithFlags.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include + #include constexpr size_t buffer_size = (1024*1024); diff --git a/catch/unit/g++/hipMalloc.cc b/catch/unit/g++/hipMalloc.cc index 22e3141c0e..c0ee9d0892 100644 --- a/catch/unit/g++/hipMalloc.cc +++ b/catch/unit/g++/hipMalloc.cc @@ -18,7 +18,7 @@ * */ #include -#include + #include "hipMalloc.h" /** * @addtogroup hipMalloc hipMalloc diff --git a/catch/unit/gcc/gccTest.cc b/catch/unit/gcc/gccTest.cc index 6c64553558..6332540682 100644 --- a/catch/unit/gcc/gccTest.cc +++ b/catch/unit/gcc/gccTest.cc @@ -18,7 +18,7 @@ * */ #include -#include + extern "C" { #include "LaunchKernel.h" } diff --git a/catch/unit/graph/hipGraphAddDependencies.cc b/catch/unit/graph/hipGraphAddDependencies.cc index 0102d90ca9..1281581f89 100644 --- a/catch/unit/graph/hipGraphAddDependencies.cc +++ b/catch/unit/graph/hipGraphAddDependencies.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphAddKernelNode.cc b/catch/unit/graph/hipGraphAddKernelNode.cc index 9f030c9807..dd6d9f6960 100644 --- a/catch/unit/graph/hipGraphAddKernelNode.cc +++ b/catch/unit/graph/hipGraphAddKernelNode.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #define CODEOBJ_FILE "add_Kernel.code" #define KERNEL_NAME "Add" diff --git a/catch/unit/graph/hipGraphGetEdges.cc b/catch/unit/graph/hipGraphGetEdges.cc index e2a863ef04..408ba88884 100644 --- a/catch/unit/graph/hipGraphGetEdges.cc +++ b/catch/unit/graph/hipGraphGetEdges.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphGetNodes.cc b/catch/unit/graph/hipGraphGetNodes.cc index 959c9c55b6..2d7837fd13 100644 --- a/catch/unit/graph/hipGraphGetNodes.cc +++ b/catch/unit/graph/hipGraphGetNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphGetRootNodes.cc b/catch/unit/graph/hipGraphGetRootNodes.cc index 69e4b34de0..7e88953f39 100644 --- a/catch/unit/graph/hipGraphGetRootNodes.cc +++ b/catch/unit/graph/hipGraphGetRootNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphNodeGetDependencies.cc b/catch/unit/graph/hipGraphNodeGetDependencies.cc index 82e7ce9aef..a395ebc961 100644 --- a/catch/unit/graph/hipGraphNodeGetDependencies.cc +++ b/catch/unit/graph/hipGraphNodeGetDependencies.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc index 63d5c4f889..f3a6d984f9 100644 --- a/catch/unit/graph/hipGraphNodeGetDependentNodes.cc +++ b/catch/unit/graph/hipGraphNodeGetDependentNodes.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipGraphRemoveDependencies.cc b/catch/unit/graph/hipGraphRemoveDependencies.cc index f29498950b..46d4d6ff10 100644 --- a/catch/unit/graph/hipGraphRemoveDependencies.cc +++ b/catch/unit/graph/hipGraphRemoveDependencies.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include "graph_dependency_common.hh" diff --git a/catch/unit/graph/hipStreamBeginCapture.cc b/catch/unit/graph/hipStreamBeginCapture.cc index 21a2edec0b..cbe8185818 100644 --- a/catch/unit/graph/hipStreamBeginCapture.cc +++ b/catch/unit/graph/hipStreamBeginCapture.cc @@ -19,7 +19,8 @@ THE SOFTWARE. #include #include -#include "stream_capture_common.hh" // NOLINT + +#include "stream_capture_common.hh" // NOLINT #pragma clang diagnostic ignored "-Wunused-variable" /** diff --git a/catch/unit/kernel/hipDynamicShared.cc b/catch/unit/kernel/hipDynamicShared.cc index c8593b0939..90de000d8f 100644 --- a/catch/unit/kernel/hipDynamicShared.cc +++ b/catch/unit/kernel/hipDynamicShared.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #pragma clang diagnostic ignored "-Wunused-parameter" diff --git a/catch/unit/kernel/hipDynamicShared2.cc b/catch/unit/kernel/hipDynamicShared2.cc index 47a94c1357..bcc5ecca85 100644 --- a/catch/unit/kernel/hipDynamicShared2.cc +++ b/catch/unit/kernel/hipDynamicShared2.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #define LEN (16 * 1024) #define SIZE (LEN * sizeof(float)) diff --git a/catch/unit/kernel/hipEmptyKernel.cc b/catch/unit/kernel/hipEmptyKernel.cc index 9262397416..eb6f9818ba 100644 --- a/catch/unit/kernel/hipEmptyKernel.cc +++ b/catch/unit/kernel/hipEmptyKernel.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #pragma clang diagnostic ignored "-Wunused-parameter" diff --git a/catch/unit/kernel/hipExtLaunchKernelGGL.cc b/catch/unit/kernel/hipExtLaunchKernelGGL.cc index c23ebb7f81..6cf89ade50 100644 --- a/catch/unit/kernel/hipExtLaunchKernelGGL.cc +++ b/catch/unit/kernel/hipExtLaunchKernelGGL.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #include "hip/hip_ext.h" static unsigned threadsPerBlock = 256; diff --git a/catch/unit/kernel/hipGridLaunch.cc b/catch/unit/kernel/hipGridLaunch.cc index 7716b0781a..e009a29c77 100644 --- a/catch/unit/kernel/hipGridLaunch.cc +++ b/catch/unit/kernel/hipGridLaunch.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + static unsigned threadsPerBlock = 256; static unsigned blocksPerCU = 6; diff --git a/catch/unit/kernel/hipLanguageExtensions.cc b/catch/unit/kernel/hipLanguageExtensions.cc index 446c91a213..e303c52138 100644 --- a/catch/unit/kernel/hipLanguageExtensions.cc +++ b/catch/unit/kernel/hipLanguageExtensions.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include #pragma clang diagnostic ignored "-Wunused-variable" diff --git a/catch/unit/kernel/hipLaunchParm.cc b/catch/unit/kernel/hipLaunchParm.cc index 9ae028cac7..c5cf63cbbf 100644 --- a/catch/unit/kernel/hipLaunchParm.cc +++ b/catch/unit/kernel/hipLaunchParm.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + #include #pragma clang diagnostic ignored "-Wunused-variable" diff --git a/catch/unit/kernel/hipLaunchParmFunctor.cc b/catch/unit/kernel/hipLaunchParmFunctor.cc index 5b12ff52ae..a99aa5f412 100644 --- a/catch/unit/kernel/hipLaunchParmFunctor.cc +++ b/catch/unit/kernel/hipLaunchParmFunctor.cc @@ -20,7 +20,7 @@ THE SOFTWARE. #include #include #include -#include + class HipFunctorTests { public: diff --git a/catch/unit/kernel/hipPrintfKernel.cc b/catch/unit/kernel/hipPrintfKernel.cc index da81c767fa..c616f86eae 100644 --- a/catch/unit/kernel/hipPrintfKernel.cc +++ b/catch/unit/kernel/hipPrintfKernel.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #include #include "../kernel/printf_common.h" diff --git a/catch/unit/kernel/hipTestConstant.cc b/catch/unit/kernel/hipTestConstant.cc index 911457af0f..0d7693a91f 100644 --- a/catch/unit/kernel/hipTestConstant.cc +++ b/catch/unit/kernel/hipTestConstant.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN 512 #define SIZE 2048 diff --git a/catch/unit/kernel/hipTestGlobalVariable.cc b/catch/unit/kernel/hipTestGlobalVariable.cc index a2d99fa8b7..151a92f7e8 100644 --- a/catch/unit/kernel/hipTestGlobalVariable.cc +++ b/catch/unit/kernel/hipTestGlobalVariable.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN 512 #define SIZE 2048 diff --git a/catch/unit/kernel/hipTestMemKernel.cc b/catch/unit/kernel/hipTestMemKernel.cc index d97a5698e0..beedb180a5 100644 --- a/catch/unit/kernel/hipTestMemKernel.cc +++ b/catch/unit/kernel/hipTestMemKernel.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + #define LEN8 8 * 4 #define LEN9 9 * 4 diff --git a/catch/unit/kernel/launch_bounds.cc b/catch/unit/kernel/launch_bounds.cc index 59b1132898..5cab54679b 100644 --- a/catch/unit/kernel/launch_bounds.cc +++ b/catch/unit/kernel/launch_bounds.cc @@ -17,7 +17,7 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include -#include + constexpr size_t N = 1024; int p_blockSize = 256; diff --git a/catch/unit/memory/hipHostRegister.cc b/catch/unit/memory/hipHostRegister.cc index cb62532ae7..2110ee9281 100644 --- a/catch/unit/memory/hipHostRegister.cc +++ b/catch/unit/memory/hipHostRegister.cc @@ -32,7 +32,7 @@ THE SOFTWARE. #include #include #include -#include + #include #define OFFSET 128 diff --git a/catch/unit/memory/hipPointerGetAttributes.cc b/catch/unit/memory/hipPointerGetAttributes.cc index 4c34a6edc4..60357678f6 100644 --- a/catch/unit/memory/hipPointerGetAttributes.cc +++ b/catch/unit/memory/hipPointerGetAttributes.cc @@ -30,7 +30,7 @@ Following scenarios are verified for hipPointerGetAttributes API */ #include #include -#include + #ifdef __linux__ #include #endif diff --git a/catch/unit/module/hipExtModuleLaunchKernel.cc b/catch/unit/module/hipExtModuleLaunchKernel.cc index 8c77b796d1..3772587453 100644 --- a/catch/unit/module/hipExtModuleLaunchKernel.cc +++ b/catch/unit/module/hipExtModuleLaunchKernel.cc @@ -44,7 +44,7 @@ THE SOFTWARE. */ #include -#include + #include #include #include "hip/hip_ext.h" diff --git a/catch/unit/p2p/hipDeviceGetP2PAttribute.cc b/catch/unit/p2p/hipDeviceGetP2PAttribute.cc index 89207fee7f..5564fdfc31 100644 --- a/catch/unit/p2p/hipDeviceGetP2PAttribute.cc +++ b/catch/unit/p2p/hipDeviceGetP2PAttribute.cc @@ -23,7 +23,7 @@ THE SOFTWARE. #include "hip/hip_runtime_api.h" #include #include -#include + /** * @addtogroup hipDeviceGetP2PAttribute hipDeviceGetP2PAttribute diff --git a/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc b/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc index 2aade21ad3..fcd114634f 100644 --- a/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc +++ b/catch/unit/p2p/hipP2pLinkTypeAndHopFunc.cc @@ -21,7 +21,7 @@ THE SOFTWARE. #include #include #include -#include + #ifdef __linux__ #include #include diff --git a/catch/unit/printf/printfFlagsNonHost.cc b/catch/unit/printf/printfFlagsNonHost.cc index 75f5ef7c35..1fd7900d52 100644 --- a/catch/unit/printf/printfFlagsNonHost.cc +++ b/catch/unit/printf/printfFlagsNonHost.cc @@ -18,7 +18,7 @@ THE SOFTWARE. #include #include -#include + /** * @addtogroup printf printf diff --git a/catch/unit/printf/printfHost.cc b/catch/unit/printf/printfHost.cc index 3456d63d21..a4afc5268a 100644 --- a/catch/unit/printf/printfHost.cc +++ b/catch/unit/printf/printfHost.cc @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include + // Kernel Function __global__ void run_printf(int *count) { diff --git a/catch/unit/printf/printfNonHost.cc b/catch/unit/printf/printfNonHost.cc index 36bd8e35f6..80989de77a 100644 --- a/catch/unit/printf/printfNonHost.cc +++ b/catch/unit/printf/printfNonHost.cc @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include -#include + #define ITER_COUNT 61681 #define KERNEL_ITERATIONS 15 diff --git a/catch/unit/printf/printfSpecifiersNonHost.cc b/catch/unit/printf/printfSpecifiersNonHost.cc index 7c6559641f..c712e5e435 100644 --- a/catch/unit/printf/printfSpecifiersNonHost.cc +++ b/catch/unit/printf/printfSpecifiersNonHost.cc @@ -22,7 +22,7 @@ THE SOFTWARE. #include #include -#include + /** * @addtogroup printf diff --git a/catch/unit/stream/hipStreamGetDevice.cc b/catch/unit/stream/hipStreamGetDevice.cc index 1fe87eaeeb..fd89069f2f 100644 --- a/catch/unit/stream/hipStreamGetDevice.cc +++ b/catch/unit/stream/hipStreamGetDevice.cc @@ -20,6 +20,7 @@ THE SOFTWARE. #include #include #include + #define NUMBER_OF_THREADS 10 static bool thread_results[NUMBER_OF_THREADS]; diff --git a/utils/coverage/device_api_list.txt b/utils/coverage/device_api_list.txt index 7588950886..73adb4a3d2 100644 --- a/utils/coverage/device_api_list.txt +++ b/utils/coverage/device_api_list.txt @@ -731,3 +731,9 @@ Device float16 functions [ __hisnan2 __hneg2 ] + +OpenGL Interop [ + hipGLGetDevices + hipGraphicsGLRegisterBuffer + hipGraphicsGLRegisterImage +] \ No newline at end of file diff --git a/utils/coverage/hipAPICoverageUtils.cpp b/utils/coverage/hipAPICoverageUtils.cpp index 30242fbcb9..ea55351046 100644 --- a/utils/coverage/hipAPICoverageUtils.cpp +++ b/utils/coverage/hipAPICoverageUtils.cpp @@ -47,7 +47,8 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { std::string api_member{"." + hip_api.getName() + "("}; std::string api_newline{" " + hip_api.getName() + "("}; std::string api_templated{" " + hip_api.getName() + "<"}; - std::string api_kernel_def_macro{"_KERNEL_DEF(" + hip_api.getName() + ")"}; + std::string api_kernel_def_macro{"_KERNEL_DEF(" + hip_api.getName()}; + std::string api_test_def_macro{"_TEST_DEF(" + hip_api.getName()}; std::string api_restriction{hip_api.getFileRestriction()}; bool found_restriction{false}; @@ -68,7 +69,8 @@ void findAPICallInFile(HipAPI& hip_api, std::string test_module_file) { (line.find(api_newline) != std::string::npos) || (line.find(hip_api.getName() + "(") == 0) || (line.find(api_templated) != std::string::npos) || - (line.find(api_kernel_def_macro) != std::string::npos)) { + (line.find(api_kernel_def_macro) != std::string::npos) || + (line.find(api_test_def_macro) != std::string::npos)) { if (api_restriction == "" || found_restriction) { hip_api.addFileOccurrence(FileOccurrence(test_module_file, line_number)); } @@ -199,6 +201,7 @@ std::vector extractHipAPIs(std::string& hip_api_header_file, of code shall not be considered. */ std::string hip_api_prefix{"hip"}; + std::string hip_api_prefix_builtin{"__hip"}; std::string group_definition{"@defgroup"}; std::string add_group_definition{"@addtogroup"}; std::string start_of_api_groups{"HIP API"}; @@ -289,7 +292,11 @@ std::vector extractHipAPIs(std::string& hip_api_header_file, Remove all spaces if they exist in the parsed string, e.g., hipError_t hipDeviceSetLimit ( enum hipLimit_t limit, size_t value );. */ - std::string api_name{api_name_no_brackets.substr(api_name_no_brackets.rfind(hip_api_prefix))}; + auto api_name_pos = api_name_no_brackets.rfind(hip_api_prefix_builtin); + if (api_name_pos == std::string::npos) { + api_name_pos = api_name_no_brackets.rfind(hip_api_prefix); + } + std::string api_name{api_name_no_brackets.substr(api_name_pos)}; api_name.erase(std::remove(api_name.begin(), api_name.end(), ' '), api_name.end()); if (!api_group_names_tracker.empty()) { diff --git a/utils/coverage/mainCoverage.cpp b/utils/coverage/mainCoverage.cpp index cc35cab075..89f7b61652 100644 --- a/utils/coverage/mainCoverage.cpp +++ b/utils/coverage/mainCoverage.cpp @@ -47,6 +47,7 @@ int main(int argc, char** argv) { std::cout << "Number of detected HIP APIs from " << hip_api_header_file << ": " << hip_apis.size() << std::endl; + api_group_names.push_back("Runtime Compilation"); std::vector hip_rtc_apis{extractHipAPIs(hip_rtc_header_file, api_group_names, true)}; std::cout << "Number of detected HIP APIs from " << hip_rtc_header_file << ": " << hip_rtc_apis.size() << std::endl; From 1e15bfca2317bc7893c9e4a134ced61485bbe3ef Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Fri, 29 Dec 2023 14:29:03 +0000 Subject: [PATCH 60/71] EXSWHTEC-383 - Implement tests for hipGraphAddNode API #456 Change-Id: I91762d23c4a05acc68463402e76cee4e26d27b57 --- catch/unit/graph/CMakeLists.txt | 1 + catch/unit/graph/hipGraphAddNode.cc | 553 ++++++++++++++++++++++++++++ 2 files changed, 554 insertions(+) create mode 100644 catch/unit/graph/hipGraphAddNode.cc diff --git a/catch/unit/graph/CMakeLists.txt b/catch/unit/graph/CMakeLists.txt index d6519375a4..6a077cc80e 100644 --- a/catch/unit/graph/CMakeLists.txt +++ b/catch/unit/graph/CMakeLists.txt @@ -157,6 +157,7 @@ set(TEST_SRC hipDeviceSetGraphMemAttribute.cc hipDeviceGetGraphMemAttribute.cc hipDeviceGraphMemTrim.cc + hipGraphAddNode.cc ) add_custom_target(add_Kernel.code COMMAND ${CMAKE_CXX_COMPILER} --genco ${OFFLOAD_ARCH_STR} ${CMAKE_CURRENT_SOURCE_DIR}/add_Kernel.cpp -o ${CMAKE_CURRENT_BINARY_DIR}/../graph/add_Kernel.code -I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include --rocm-path=${ROCM_PATH}) diff --git a/catch/unit/graph/hipGraphAddNode.cc b/catch/unit/graph/hipGraphAddNode.cc new file mode 100644 index 0000000000..645fd0716f --- /dev/null +++ b/catch/unit/graph/hipGraphAddNode.cc @@ -0,0 +1,553 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "graph_memset_node_test_common.hh" +#include "graph_tests_common.hh" + +#pragma clang diagnostic ignored "-Wunused-parameter" + +/** + * @addtogroup hipGraphAddNode hipGraphAddNode + * @{ + * @ingroup GraphTest + * `hipGraphAddNode(hipGraphNode_t *pGraphNode, hipGraph_t graph, const hipGraphNode_t + * *pDependencies, size_t numDependencies, hipGraphNodeParams *nodeParams)` - Creates a node and + * adds it to a graph + */ + +static constexpr size_t N = 1024; + +static void callbackfunc(void* A_h) { + int* A = reinterpret_cast(A_h); + for (int i = 0; i < N; i++) { + A[i] = i; + } +} + +static void __global__ vector_square(int* A_d) { + for (int i = 0; i < N; i++) { + A_d[i] = A_d[i] * A_d[i]; + } +} + +/** + * Test Description + * ------------------------ + * - Verify that all elements of destination memory are set to the correct value. + * The test is repeated for all valid element sizes(1, 2, 4), and several allocations of different + * height and width, both on host and device. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEMPLATE_TEST_CASE("Unit_hipGraphAddNodeTypeMemset_Positive_Basic", "", uint8_t, uint16_t, + uint32_t) { + const auto f = [](hipMemsetParams* params) { + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t node = nullptr; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeMemset; + node_params.memset.dst = params->dst; + node_params.memset.elementSize = params->elementSize; + node_params.memset.width = params->width; + node_params.memset.height = params->height; + node_params.memset.pitch = params->pitch; + node_params.memset.value = params->value; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; + }; + + GraphMemsetNodeCommonPositive(f); +} + +/** + * Test Description + * ------------------------ + * - Verify that kernel node added with hipGraphAddNode executes correctly and does the square of + * values in the device array. The result is copied to host and verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeKernel_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipGraphExec_t graphExec; + + int* A_d{nullptr}; + int *A_h{nullptr}, *B_h{nullptr}; + HipTest::initArrays(&A_d, nullptr, nullptr, &A_h, &B_h, nullptr, N, false); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t memcpyH2D_A, memcpyD2H_B; + hipKernelNodeParams kernelNodeParams{}; + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, graph, nullptr, 0, A_d, A_h, allocation_size, + hipMemcpyHostToDevice)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeKernel; + void* kernel_args[] = {&A_d}; + node_params.kernel.func = reinterpret_cast(vector_square); + node_params.kernel.gridDim = dim3(1); + node_params.kernel.blockDim = dim3(1); + node_params.kernel.sharedMemBytes = 0; + node_params.kernel.kernelParams = reinterpret_cast(kernel_args); + node_params.kernel.extra = nullptr; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyD2H_B, graph, nullptr, 0, B_h, A_d, allocation_size, + hipMemcpyDeviceToHost)); + + HIP_CHECK(hipGraphAddDependencies(graph, &memcpyH2D_A, &node, 1)); + HIP_CHECK(hipGraphAddDependencies(graph, &node, &memcpyD2H_B, 1)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (B_h[i] != (A_h[i] * A_h[i])) { + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, nullptr, nullptr, A_h, B_h, nullptr, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify that host node added with hipGraphAddNode executes correctly and sets values of host + * array. The result is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeHost_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipGraphExec_t graphExec; + int* A_h = (int*)malloc(allocation_size); + std::fill_n(A_h, N, 0); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeHost; + node_params.host.fn = callbackfunc; + node_params.host.userData = A_h; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (A_h[i] != static_cast(i)) { + REQUIRE(false); + } + } + + free(A_h); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify that when graph is created and childgraph node is added with hipGraphAddNode, the + * childgraph executes correctly. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeChildGraph_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph, childgraph; + hipGraphExec_t graphExec; + + int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}; + int *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); + + HIP_CHECK(hipGraphCreate(&graph, 0)); + + for (size_t i = 0; i < N; i++) { + B_h[i] = i; + } + + hipGraphNode_t memcpyH2D_A, memcpyH2D_B, childGraphNode1, memcpyH2D_C; + hipStream_t streamForGraph; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&childgraph, 0)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_B, childgraph, nullptr, 0, B_d, B_h, allocation_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_A, childgraph, nullptr, 0, A_h, B_d, allocation_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, graph, nullptr, 0, C_d, C_h, allocation_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2D_C, graph, nullptr, 0, A_h, C_d, allocation_size, + hipMemcpyDeviceToHost)); + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeGraph; + node_params.graph.graph = childgraph; + HIP_CHECK(hipGraphAddNode(&childGraphNode1, graph, nullptr, 0, &node_params)); + + HIP_CHECK(hipGraphAddDependencies(childgraph, &memcpyH2D_B, &memcpyH2D_A, 1)); + + // Instantiate and launch the childgraph + HIP_CHECK(hipGraphInstantiate(&graphExec, childgraph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + // Verify execution result + for (size_t i = 0; i < N; i++) { + if (B_h[i] != A_h[i]) { + REQUIRE(false); + } + } + + HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(childgraph)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + + +static hipError_t MemcpyType3DWrapper(PtrVariant dst_ptr, hipPos dst_pos, PtrVariant src_ptr, + hipPos src_pos, hipExtent extent, hipMemcpyKind kind, + hipStream_t stream = nullptr) { + auto parms = GetMemcpy3DParms(dst_ptr, dst_pos, src_ptr, src_pos, extent, kind); + + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipGraphNode_t node = nullptr; + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeMemcpy; + memset(&node_params.memcpy, 0, sizeof(hipMemcpyNodeParams)); + node_params.memcpy.copyParams = parms; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + hipGraphExec_t graph_exec = nullptr; + HIP_CHECK(hipGraphInstantiate(&graph_exec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graph_exec, hipStreamPerThread)); + HIP_CHECK(hipStreamSynchronize(hipStreamPerThread)); + + HIP_CHECK(hipGraphExecDestroy(graph_exec)); + HIP_CHECK(hipGraphDestroy(graph)); + + return hipSuccess; +} + +/** + * Test Description + * ------------------------ + * - Verify basic API behavior. A Memcpy node is created using hipGraphAddNode with parameters + * set according to the test run, after which the graph is run and the memcpy results are verified. + * The test is run for all possible memcpy directions, with both the corresponding memcpy + * kind and hipMemcpyDefault, as well as half page and full page allocation sizes. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeMemcpy_Positive_Basic") { + constexpr bool async = false; + + SECTION("Device to host") { Memcpy3DDeviceToHostShell(MemcpyType3DWrapper); } + + SECTION("Device to host with default kind") { + Memcpy3DDeviceToHostShell(MemcpyType3DWrapper); + } + + SECTION("Host to device") { Memcpy3DHostToDeviceShell(MemcpyType3DWrapper); } + + SECTION("Host to device with default kind") { + Memcpy3DHostToDeviceShell(MemcpyType3DWrapper); + } + + SECTION("Host to host") { Memcpy3DHostToHostShell(MemcpyType3DWrapper); } + + SECTION("Host to host with default kind") { Memcpy3DHostToHostShell(MemcpyType3DWrapper); } + + SECTION("Device to device") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + } + + SECTION("Device to device with default kind") { + SECTION("Peer access enabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + SECTION("Peer access disabled") { + Memcpy3DDeviceToDeviceShell(MemcpyType3DWrapper); + } + } + + SECTION("Array from/to Host") { Memcpy3DArrayHostShell(MemcpyType3DWrapper); } + +#if HT_NVIDIA // Disabled on AMD due to defect - EXSWHTEC-220 + SECTION("Array from/to Device") { Memcpy3DArrayDeviceShell(MemcpyType3DWrapper); } +#endif +} + + +/** + * Test Description + * ------------------------ + * - Verify basic API functionality where one event record node is added to graph with + * hipGraphAddNode and its correct behavior is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeEventRecord_Positive_Basic") { + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + hipGraphNode_t node; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeEventRecord; + node_params.eventRecord.event = event; + HIP_CHECK(hipGraphAddNode(&node, graph, nullptr, 0, &node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + // Wait for event + HIP_CHECK(hipEventSynchronize(event)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Verify basic API functionality where one event record and one event wait nodes are added to + * graph with hipGraphAddNode and their correct behavior is verified. + * Test source + * ------------------------ + * - unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeEventWait_Positive_Basic") { + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + hipGraphNode_t event_rec_node, event_wait_node; + + // Create a event record node in graph + hipGraphNodeParams rec_node_params = {}; + rec_node_params.type = hipGraphNodeTypeEventRecord; + rec_node_params.eventRecord.event = event; + HIP_CHECK(hipGraphAddNode(&event_rec_node, graph, nullptr, 0, &rec_node_params)); + + // Create a event wait node in graph + hipGraphNodeParams wait_node_params = {}; + rec_node_params.type = hipGraphNodeTypeWaitEvent; + rec_node_params.eventWait.event = event; + HIP_CHECK(hipGraphAddNode(&event_wait_node, graph, &event_rec_node, 1, &wait_node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify basic API functionality when memalloc and memfree nodes are added with + * hipGraphAddNode. Verify that memory is allocated correctly and graph behaves as expected when + * free node is added to the same graph. + * Test source + * ------------------------ + * - /unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNodeTypeMemAlloc_Positive_Basic") { + constexpr size_t allocation_size = N * sizeof(int); + hipGraph_t graph; + hipStream_t streamForGraph; + hipGraphExec_t graphExec; + HIP_CHECK(hipStreamCreate(&streamForGraph)); + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipGraphNode_t alloc_node; + hipGraphNodeParams alloc_node_params = {}; + alloc_node_params.type = hipGraphNodeTypeMemAlloc; + memset(&alloc_node_params.alloc, 0, sizeof(hipMemAllocNodeParams)); + alloc_node_params.alloc.bytesize = allocation_size; + alloc_node_params.alloc.poolProps.allocType = hipMemAllocationTypePinned; + alloc_node_params.alloc.poolProps.location.id = 0; + alloc_node_params.alloc.poolProps.location.type = hipMemLocationTypeDevice; + HIP_CHECK(hipGraphAddNode(&alloc_node, graph, nullptr, 0, &alloc_node_params)); + + REQUIRE(alloc_node_params.alloc.dptr != nullptr); + int* A_d = reinterpret_cast(alloc_node_params.alloc.dptr); + + hipGraphNode_t free_node; + hipGraphNodeParams free_node_params = {}; + free_node_params.type = hipGraphNodeTypeMemFree; + free_node_params.free.dptr = A_d; + HIP_CHECK(hipGraphAddNode(&free_node, graph, &alloc_node, 1, &free_node_params)); + + // Instantiate and launch the graph + HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0)); + HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph)); + HIP_CHECK(hipStreamSynchronize(streamForGraph)); + + HIP_CHECK(hipGraphExecDestroy(graphExec)); + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipStreamDestroy(streamForGraph)); + HIP_CHECK(hipDeviceGraphMemTrim(0)); +} + +/** + * Test Description + * ------------------------ + * - Test to verify hipGraphAddNode behavior with invalid arguments: + * -# Nullptr graph + * -# Nullptr graph node + * -# Invalid numDependencies for null list of dependencies + * -# Node in dependency is from different graph + * -# Invalid numNodes + * -# Duplicate node in dependencies + * -# Nullptr params + * -# params type is invalid + * Test source + * ------------------------ + * - /unit/graph/hipGraphAddNode.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 6.0 + */ +TEST_CASE("Unit_hipGraphAddNode_Negative_Parameters") { + using namespace std::placeholders; + hipGraph_t graph = nullptr; + HIP_CHECK(hipGraphCreate(&graph, 0)); + + hipEvent_t event; + HIP_CHECK(hipEventCreate(&event)); + + hipGraphNode_t node; + hipGraphNodeParams node_params = {}; + node_params.type = hipGraphNodeTypeEventRecord; + node_params.eventRecord.event = event; + + GraphAddNodeCommonNegativeTests(std::bind(hipGraphAddNode, _1, _2, _3, _4, &node_params), graph); + + SECTION("params == nullptr") { + HIP_CHECK_ERROR(hipGraphAddNode(&node, graph, nullptr, 0, nullptr), hipErrorInvalidValue); + } + + SECTION("params type is invalid") { + node_params.type = static_cast(0x20); + HIP_CHECK_ERROR(hipGraphAddNode(&node, graph, nullptr, 0, &node_params), hipErrorInvalidValue); + } + + HIP_CHECK(hipGraphDestroy(graph)); + HIP_CHECK(hipEventDestroy(event)); +} From 34da562ff508f71424e158be131ea69aed35c80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 19:21:21 +0100 Subject: [PATCH 61/71] EXSWHTEC-118 - Implement tests for hipFuncGetAttribute #61 Change-Id: Ie817859ef51701a3758dd44736e947c5b20c2d46 --- catch/hipTestMain/config/config_amd_linux | 2 + catch/hipTestMain/config/config_amd_windows | 3 + catch/unit/module/CMakeLists.txt | 4 + catch/unit/module/hipFuncGetAttribute.cc | 96 +++++++++++++++++++ .../hipModuleLaunchCooperativeKernel.cc | 16 ++-- 5 files changed, 113 insertions(+), 8 deletions(-) create mode 100644 catch/unit/module/hipFuncGetAttribute.cc diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 66107df9e1..441b3dc9ed 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -136,6 +136,8 @@ "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", "Unit_Device_ilogbf_Accuracy_Positive", "Unit_Device_ilogb_Accuracy_Positive", + "NOTE: The following test is disabled due to defect - EXSWHTEC-245", + "Unit_hipFuncGetAttribute_Negative_Parameters", "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipMemAddressReserve_Negative", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 5631444f2b..dacd9280c5 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -229,6 +229,9 @@ "=== Below 2 tests are disable due to defect EXSWHTEC-369 ===", "Unit_Device_ilogbf_Accuracy_Positive", "Unit_Device_ilogb_Accuracy_Positive", + "NOTE: The following test is disabled due to defect - EXSWHTEC-245", + "Unit_hipFuncGetAttribute_Negative_Parameters", + "Unit_hipMemAddressFree_negative", "Unit_hipMemAddressReserve_AlignmentTest", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", "Unit_hipMemCreate_ChkWithKerLaunch", diff --git a/catch/unit/module/CMakeLists.txt b/catch/unit/module/CMakeLists.txt index 76ca9e9ec6..6ae01bfbc5 100644 --- a/catch/unit/module/CMakeLists.txt +++ b/catch/unit/module/CMakeLists.txt @@ -31,6 +31,7 @@ set(TEST_SRC hipModuleGetTexRef.cc hipModuleLaunchCooperativeKernel.cc hipModuleLaunchCooperativeKernelMultiDevice.cc + hipFuncGetAttribute.cc ) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code @@ -63,6 +64,9 @@ add_custom_target(get_tex_ref_module ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/get # Note to pass arch use format like -DOFFLOAD_ARCH_STR="--offload-arch=gfx900 --offload-arch=gfx906" # having space at the start/end of OFFLOAD_ARCH_STR can cause build failures +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/get_function_module.code + COMMAND ${CMAKE_CXX_COMPILER} --genco --std=c++17 ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc -o get_function_module.code + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/get_function_module.cc) if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC diff --git a/catch/unit/module/hipFuncGetAttribute.cc b/catch/unit/module/hipFuncGetAttribute.cc new file mode 100644 index 0000000000..c55b5179d3 --- /dev/null +++ b/catch/unit/module/hipFuncGetAttribute.cc @@ -0,0 +1,96 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip_module_common.hh" + +#include +#include +#include + +static hipModule_t GetModule() { + HIP_CHECK(hipFree(nullptr)); + static const auto mg = ModuleGuard::LoadModule("get_function_module.code"); + return mg.module(); +} + +TEST_CASE("Unit_hipFuncGetAttribute_Positive_Basic") { + hipFunction_t kernel = GetKernel(GetModule(), "GlobalKernel"); + + int value; + + SECTION("binaryVersion") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, kernel)); +#if HT_NVIDIA + const auto major = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMajor, 0); + const auto minor = GetDeviceAttribute(hipDeviceAttributeComputeCapabilityMinor, 0); + REQUIRE(value == major * 10 + minor); +#elif HT_AMD + REQUIRE(value > 0); +#endif + } + + SECTION("cacheModeCA") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA, kernel)); + REQUIRE((value == 0 || value == 1)); + } + + SECTION("maxThreadsPerBlock") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, kernel)); + REQUIRE(value == GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0)); + } + + SECTION("numRegs") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_NUM_REGS, kernel)); + REQUIRE(value >= 0); + } + + SECTION("ptxVersion") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_PTX_VERSION, kernel)); + REQUIRE(value > 0); + } + + SECTION("sharedSizeBytes") { + HIP_CHECK(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, kernel)); + REQUIRE(value <= GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0)); + } +} + +TEST_CASE("Unit_hipFuncGetAttribute_Negative_Parameters") { + hipFunction_t kernel = GetKernel(GetModule(), "GlobalKernel"); + + int value; + + SECTION("value == nullptr") { + HIP_CHECK_ERROR(hipFuncGetAttribute(nullptr, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, kernel), + hipErrorInvalidValue); + } + + SECTION("invalid attribute") { + HIP_CHECK_ERROR(hipFuncGetAttribute(&value, static_cast(-1), kernel), + hipErrorInvalidValue); + } + + SECTION("hfunc == nullptr") { + HIP_CHECK_ERROR(hipFuncGetAttribute(&value, HIP_FUNC_ATTRIBUTE_BINARY_VERSION, nullptr), + hipErrorInvalidResourceHandle); + } +} \ No newline at end of file diff --git a/catch/unit/module/hipModuleLaunchCooperativeKernel.cc b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc index 0ca6a31293..cf92152bce 100644 --- a/catch/unit/module/hipModuleLaunchCooperativeKernel.cc +++ b/catch/unit/module/hipModuleLaunchCooperativeKernel.cc @@ -97,17 +97,17 @@ TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Positive_Parameters") { hipFunction_t f = GetKernel(mg.module(), "NOPKernel"); SECTION("blockDim.x == maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX); + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0); HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr)); } SECTION("blockDim.y == maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY); + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0); HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, y, 1, 1, 0, nullptr, nullptr)); } SECTION("blockDim.z == maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ); + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0); HIP_CHECK(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, z, 1, 1, 0, nullptr, nullptr)); } } @@ -168,25 +168,25 @@ TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters") { } SECTION("blockDim.x > maxBlockDimX") { - const unsigned int x = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimX) + 1u; + const unsigned int x = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimX, 0) + 1u; HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, x, 1, 1, 0, nullptr, nullptr), hipErrorInvalidValue); } SECTION("blockDim.y > maxBlockDimY") { - const unsigned int y = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimY) + 1u; + const unsigned int y = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimY, 0) + 1u; HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, y, 1, 0, nullptr, nullptr), hipErrorInvalidValue); } SECTION("blockDim.z > maxBlockDimZ") { - const unsigned int z = GetDeviceAttribute(0, hipDeviceAttributeMaxBlockDimZ) + 1u; + const unsigned int z = GetDeviceAttribute(hipDeviceAttributeMaxBlockDimZ, 0) + 1u; HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, z, 0, nullptr, nullptr), hipErrorInvalidValue); } SECTION("blockDim.x * blockDim.y * blockDim.z > maxThreadsPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxThreadsPerBlock); + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxThreadsPerBlock, 0); const unsigned int dim = std::ceil(std::cbrt(max)); HIP_CHECK_ERROR( hipModuleLaunchCooperativeKernel(f, 1, 1, 1, dim, dim, dim, 0, nullptr, nullptr), @@ -195,7 +195,7 @@ TEST_CASE("Unit_hipModuleLaunchCooperativeKernel_Negative_Parameters") { #if HT_AMD // Disabled due to defect EXSWHTEC-351 SECTION("sharedMemBytes > maxSharedMemoryPerBlock") { - const unsigned int max = GetDeviceAttribute(0, hipDeviceAttributeMaxSharedMemoryPerBlock) + 1u; + const unsigned int max = GetDeviceAttribute(hipDeviceAttributeMaxSharedMemoryPerBlock, 0) + 1u; HIP_CHECK_ERROR(hipModuleLaunchCooperativeKernel(f, 1, 1, 1, 1, 1, 1, max, nullptr, nullptr), hipErrorInvalidValue); } From abf39d2dcda4f675465031f0ecb7eb4c9ad24821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:13:17 +0100 Subject: [PATCH 62/71] EXSWHTEC-299 - Extend tests for atomic arithmetic operations #286 Change-Id: I221332c33be92ec152a2cd2ede34379aaa73d996 --- catch/unit/atomics/CMakeLists.txt | 1 + catch/unit/atomics/__hip_atomic_fetch_add.cc | 132 +++++++++++++++++++ catch/unit/atomics/arithmetic_common.hh | 99 ++++++++++---- 3 files changed, 209 insertions(+), 23 deletions(-) create mode 100644 catch/unit/atomics/__hip_atomic_fetch_add.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index 1ec472bffc..5974902732 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -48,6 +48,7 @@ set(TEST_SRC atomicDec.cc atomicCAS.cc atomicCAS_system.cc + __hip_atomic_fetch_add.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc diff --git a/catch/unit/atomics/__hip_atomic_fetch_add.cc b/catch/unit/atomics/__hip_atomic_fetch_add.cc new file mode 100644 index 0000000000..075b2b858e --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_fetch_add.cc @@ -0,0 +1,132 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_fetch_add __hip_atomic_fetch_add + * @{ + * @ingroup AtomicsTest + * ________________________ + * Test cases from other modules: + * - @ref Unit_AtomicBuiltins_Negative_Parameters_RTC + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_fetch_add + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WAVEFRONT memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_add.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Wavefront", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_fetch_add + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WORKGROUP memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_fetch_add.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_add_Positive_Workgroup", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh index cc701a06a0..11a5d771ff 100644 --- a/catch/unit/atomics/arithmetic_common.hh +++ b/catch/unit/atomics/arithmetic_common.hh @@ -40,7 +40,8 @@ enum class AtomicOperation { kUnsafeAdd, kSafeAdd, kCASAdd, - kCASAddSystem + kCASAddSystem, + kBuiltinAdd }; // Constants that are passed as operands to the atomic operations @@ -88,6 +89,9 @@ __device__ TestType CASAtomicAddSystem(TestType* address, TestType val) { // Performs an atomic operation on parameter `mem` based on the `operation` enumerator. template +// Performs an atomic operation on parameter `mem` based on the `operation` enumerator. +// `memory_scope` is forwarded to the builtin operations and is by default device-wide. +template __device__ TestType PerformAtomicOperation(TestType* const mem) { const auto val = GetTestValue(); @@ -111,6 +115,8 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return CASAtomicAdd(mem, val); } else if constexpr (operation == AtomicOperation::kCASAddSystem) { return CASAtomicAddSystem(mem, val); + } else if constexpr (operation == AtomicOperation::kBuiltinAdd) { + return __hip_atomic_fetch_add(mem, val, __ATOMIC_RELAXED, memory_scope); } } @@ -119,7 +125,8 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { // same memory location `global_mem`. // If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic // operations are executed on shared memory, and the result is copied back to `global_mem`. -template +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { __shared__ TestType shared_mem; @@ -132,7 +139,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) __syncthreads(); } - old_vals[tid] = PerformAtomicOperation(mem); + old_vals[tid] = PerformAtomicOperation(mem); if constexpr (use_shared_mem) { __syncthreads(); @@ -148,6 +155,15 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned return reinterpret_cast(byte_ptr + idx * pitch); } +// Executes arbitrary load-store operations on the range specified by `begin_addr` and `end_addr` +__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) { + for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) { + uint8_t val = *addr; + val ^= 0xAB; + *addr = val; + } +} + // This kernel executes the atomic operation specified by the enumerator `operation`. Results of the // atomic operations are stored in `old_vals`. `global_mem` is an array with `width` number of // elements. Each thread performs the atomic operation on the element that corresponds to its thread @@ -157,6 +173,9 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned // that are scattered over different cache lines. // If `use_shared_mem` is true, `global_mem` is copied to shared memory first, the atomic operations // are executed on shared memory, and the result is copied back to `global_mem`. +// If `pitch` is greater than sizeof(TestType), random memory operations are performed in the empty +// space between consecutive atomic operations so that we can test that the atomic operations +// behaves correctly even with some interference. // // For example, given that sizeof(TestType) is 1, `width` is 3, and `pitch` is 4: // @@ -165,10 +184,12 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned // | pitch | pitch | pitch | // // In this scenario, the atomic operations will target the elements denoted with `x` (addresses 0, -// 4, 8). -template +// 4, 8). Random memory traffic will be generated on the addresses in between (1, 2, 3, 5, 6, 7, 9, +// 10, 11) +template __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, - const unsigned int width, const unsigned pitch) { + const unsigned int width, const unsigned int pitch) { extern __shared__ uint8_t shared_mem[]; const auto tid = cg::this_grid().thread_rank(); @@ -183,8 +204,18 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, __syncthreads(); } - old_vals[tid] = - PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + const auto n = cooperative_groups::this_grid().size() - width; + + TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width); + + if (tid < n) { + old_vals[tid] = PerformAtomicOperation( + PitchedOffset(mem, pitch, tid % width)); + } else { + uint8_t* const begin_addr = reinterpret_cast(atomic_addr + 1); + uint8_t* const end_addr = reinterpret_cast(atomic_addr) + pitch; + GenerateMemoryTraffic(begin_addr, end_addr); + } if constexpr (use_shared_mem) { __syncthreads(); @@ -201,7 +232,7 @@ struct TestParams { return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; } - auto HostIterationsPerThread() const { + auto HostIterationsPerThread() const { // number of iterations per host thread return std::max(num_devices * kernel_count * ThreadCount() / 20, width); } @@ -234,7 +265,8 @@ std::tuple, std::vector> TestKernelHostRef(const if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || operation == AtomicOperation::kUnsafeAdd || operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd || - operation == AtomicOperation::kCASAddSystem) { + operation == AtomicOperation::kCASAddSystem || + operation == AtomicOperation::kBuiltinAdd) { res = res + val; } else if constexpr (operation == AtomicOperation::kSub || operation == AtomicOperation::kSubSystem) { @@ -248,7 +280,7 @@ std::tuple, std::vector> TestKernelHostRef(const for (auto i = 0u; i < p.num_devices; ++i) { for (auto j = 0u; j < p.kernel_count; ++j) { - for (auto tid = 0u; tid < p.ThreadCount(); ++tid) { + for (auto tid = 0u; tid < p.ThreadCount() - p.width; ++tid) { perform_op(tid); } } @@ -283,15 +315,16 @@ void Verify(const TestParams& p, std::vector& res_vals, std::vector +template void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, TestType* const old_vals) { const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; if (p.width == 1 && p.pitch == sizeof(TestType)) - TestKernel + TestKernel <<>>(mem_ptr, old_vals); else - TestKernel + TestKernel <<>>(mem_ptr, old_vals, p.width, p.pitch); } @@ -303,7 +336,8 @@ void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* for (auto i = 0u; i < iterations; ++i) { if constexpr (operation == AtomicOperation::kAddSystem || - operation == AtomicOperation::kCASAddSystem) { + operation == AtomicOperation::kCASAddSystem || + operation == AtomicOperation::kBuiltinAdd) { old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); } else if constexpr (operation == AtomicOperation::kSubSystem) { old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); @@ -339,7 +373,8 @@ void PerformHostAtomicOperation(const TestParams& p, TestType* mem, TestType* co // 2. Launch kernels based on TestParams::num_devices and TestParams::kernel_count // 3. Launch host threads based on TestParams::host_thread_count // 4. Verify the results -template +template void TestCore(const TestParams& p) { const unsigned int flags = p.alloc_type == LinearAllocs::mallocAndRegister ? hipHostRegisterMapped : 0u; @@ -371,7 +406,8 @@ void TestCore(const TestParams& p) { for (auto j = 0u; j < p.kernel_count; ++j) { const auto& stream = streams[i * p.kernel_count + j].stream(); const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); - LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + LaunchKernel(p, stream, mem_dev.ptr(), + old_vals); } } @@ -397,23 +433,40 @@ inline dim3 GenerateBlockDimensions() { } // Configures and creates the TestCore for a single device, and a single kernel launch -template +template void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { TestParams params; params.num_devices = 1; params.kernel_count = 1; - params.threads = GenerateThreadDimensions(); + if constexpr (operation == AtomicOperation::kBuiltinAdd && + memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { + params.threads = 1; + } else if constexpr (operation == AtomicOperation::kBuiltinAdd && + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + params.threads = dim3(warp_size); + } else { + params.threads = GenerateThreadDimensions(); + } params.width = width; params.pitch = pitch; SECTION("Global memory") { - params.blocks = GenerateBlockDimensions(); + if constexpr (operation == AtomicOperation::kBuiltinAdd && + (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || + memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || + memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { + params.blocks = dim3(1); + } else { + params.blocks = GenerateBlockDimensions(); + } using LA = LinearAllocs; for (const auto alloc_type : {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - TestCore(params); + TestCore(params); } } } @@ -421,7 +474,7 @@ void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int p SECTION("Shared memory") { params.blocks = dim3(1); params.alloc_type = LinearAllocs::hipMalloc; - TestCore(params); + TestCore(params); } } @@ -493,7 +546,7 @@ void MultipleDeviceMultipleKernelAndHostTest(const unsigned int num_devices, for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { params.alloc_type = alloc_type; DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { - TestCore(params); + TestCore(params); } } } \ No newline at end of file From 8f8e30e1c6357c0833d7b5c4af88ae5707446c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 17:19:20 +0100 Subject: [PATCH 63/71] EXSWHTEC-298 - Extend tests for atomic CAS operations #287 Change-Id: Ieb3e7effc1d3f767c77c0cd7c0b20c391c4665af --- catch/unit/atomics/CMakeLists.txt | 1 + .../__hip_atomic_compare_exchange_strong.cc | 129 ++++++++++++++++++ catch/unit/atomics/arithmetic_common.hh | 20 ++- 3 files changed, 144 insertions(+), 6 deletions(-) create mode 100644 catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc diff --git a/catch/unit/atomics/CMakeLists.txt b/catch/unit/atomics/CMakeLists.txt index 5974902732..ab155fae4a 100644 --- a/catch/unit/atomics/CMakeLists.txt +++ b/catch/unit/atomics/CMakeLists.txt @@ -49,6 +49,7 @@ set(TEST_SRC atomicCAS.cc atomicCAS_system.cc __hip_atomic_fetch_add.cc + __hip_atomic_compare_exchange_strong.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc diff --git a/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc b/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc new file mode 100644 index 0000000000..69fd72ec51 --- /dev/null +++ b/catch/unit/atomics/__hip_atomic_compare_exchange_strong.cc @@ -0,0 +1,129 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup __hip_atomic_compare_exchange_strong __hip_atomic_compare_exchange_strong + * @{ + * @ingroup AtomicsTest + */ + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_compare_exchange_strong + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WAVEFRONT memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_compare_exchange_strong.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Wavefront", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition on a target memory location. Each thread will add the same value to the memory location, + * storing the return value into a separate output array slot corresponding to it. Once complete, + * the output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads add to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of __hip_atomic_compare_exchange_strong + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - WORKGROUP memory scope. + * Test source + * ------------------------ + * - unit/atomics/__hip_atomic_compare_exchange_strong.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit___hip_atomic_compare_exchange_strong_Positive_Workgroup", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} \ No newline at end of file diff --git a/catch/unit/atomics/arithmetic_common.hh b/catch/unit/atomics/arithmetic_common.hh index 11a5d771ff..d8f31e5a6e 100644 --- a/catch/unit/atomics/arithmetic_common.hh +++ b/catch/unit/atomics/arithmetic_common.hh @@ -41,7 +41,8 @@ enum class AtomicOperation { kSafeAdd, kCASAdd, kCASAddSystem, - kBuiltinAdd + kBuiltinAdd, + kBuiltinCAS }; // Constants that are passed as operands to the atomic operations @@ -117,6 +118,8 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return CASAtomicAddSystem(mem, val); } else if constexpr (operation == AtomicOperation::kBuiltinAdd) { return __hip_atomic_fetch_add(mem, val, __ATOMIC_RELAXED, memory_scope); + } else if constexpr (operation == AtomicOperation::kBuiltinCAS) { + return BuiltinCASAtomicAdd(mem, val); } } @@ -266,7 +269,8 @@ std::tuple, std::vector> TestKernelHostRef(const operation == AtomicOperation::kUnsafeAdd || operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd || operation == AtomicOperation::kCASAddSystem || - operation == AtomicOperation::kBuiltinAdd) { + operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) { res = res + val; } else if constexpr (operation == AtomicOperation::kSub || operation == AtomicOperation::kSubSystem) { @@ -337,7 +341,8 @@ void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* for (auto i = 0u; i < iterations; ++i) { if constexpr (operation == AtomicOperation::kAddSystem || operation == AtomicOperation::kCASAddSystem || - operation == AtomicOperation::kBuiltinAdd) { + operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) { old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); } else if constexpr (operation == AtomicOperation::kSubSystem) { old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); @@ -438,10 +443,12 @@ void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int p TestParams params; params.num_devices = 1; params.kernel_count = 1; - if constexpr (operation == AtomicOperation::kBuiltinAdd && + if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) { params.threads = 1; - } else if constexpr (operation == AtomicOperation::kBuiltinAdd && + } else if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) { int warp_size = 0; HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); @@ -453,7 +460,8 @@ void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int p params.pitch = pitch; SECTION("Global memory") { - if constexpr (operation == AtomicOperation::kBuiltinAdd && + if constexpr ((operation == AtomicOperation::kBuiltinAdd || + operation == AtomicOperation::kBuiltinCAS) && (memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD || memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT || memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) { From 6b9857cf177a5535c5d233bbd21e19b603aeda2f Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Mon, 5 Feb 2024 20:11:52 +0530 Subject: [PATCH 64/71] EXSWHTEC-335 - Introduce float16 support to numerical accuracy test base #409 Change-Id: I2b8ba06b4a078b72469615d2e075d18d58de9652 --- catch/unit/math/Float16.hh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/catch/unit/math/Float16.hh b/catch/unit/math/Float16.hh index 79d2064cd2..636941d33d 100644 --- a/catch/unit/math/Float16.hh +++ b/catch/unit/math/Float16.hh @@ -33,9 +33,7 @@ class Float16 { __host__ __device__ Float16(__half2 x) : x_{__low2half(x)} {} __host__ __device__ Float16(float x) : x_{__float2half(x)} {} - __host__ __device__ bool operator==(Float16 other) const { - return static_cast<__half_raw>(x_).data == static_cast<__half_raw>(other.x_).data; - } + __host__ __device__ bool operator==(Float16 other) const { return __heq(x_, other.x_); } __host__ __device__ bool operator!=(Float16 other) const { return !(*this == other); } __host__ __device__ operator __half() const { return x_; } From a464ed491b7fbf7694ef31c70c0953ca585c6abf Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Mon, 5 Feb 2024 20:19:48 +0530 Subject: [PATCH 65/71] EXSWHTEC-330 - Implement tests for half-precision math functions #386 Change-Id: Iaa704c728aa430e701cd02c0175fbdeb9da686b0 --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/half_precision_math.cc | 580 ++++++++++++++++++ .../half_precision_math_negative_kernels.cc | 72 +++ 3 files changed, 657 insertions(+) create mode 100644 catch/unit/math/half_precision_math.cc create mode 100644 catch/unit/math/half_precision_math_negative_kernels.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 3cb30e1f0c..4e344c45c0 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -33,6 +33,7 @@ set(TEST_SRC casting_float_funcs.cc casting_int_funcs.cc casting_half2_funcs.cc + half_precision_math.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -122,3 +123,7 @@ add_test(NAME Unit_Device_casting_half2_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} casting_half2_negative_kernels.cc 53) +add_test(NAME Unit_Half_Precision_Math_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + half_precision_math_negative_kernels.cc 60) diff --git a/catch/unit/math/half_precision_math.cc b/catch/unit/math/half_precision_math.cc new file mode 100644 index 0000000000..a1524b1f7e --- /dev/null +++ b/catch/unit/math/half_precision_math.cc @@ -0,0 +1,580 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionMath HalfPrecisionMath + * @{ + * @ingroup MathTest + */ + + +MATH_UNARY_HP_KERNEL_DEF(hcos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hcos(x)` for all possible inputs. The results are + * compared against reference function `float std::cos(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hcos, static_cast(std::cos), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2cos); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2cos(x)` for all possible inputs. The results are + * compared against reference function `float std::cos(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2cos, static_cast(std::cos), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hsin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hsin(x)` for all possible inputs. The results are + * compared against reference function `float std::sin(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hsin, static_cast(std::sin), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2sin); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2sin(x)` for all possible inputs. The results are + * compared against reference function `float std::sin(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2sin, static_cast(std::sin), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp, static_cast(std::exp), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp(x)` for all possible inputs. The results are + * compared against reference function `float std::exp(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp, static_cast(std::exp), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp10(x)` for all possible inputs. The results are + * compared against reference function `float exp10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp10, static_cast(exp10f), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp10(x)` for all possible inputs. The results are + * compared against reference function `float exp10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp10, static_cast(exp10f), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hexp2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hexp2(x)` for all possible inputs. The results are + * compared against reference function `float std::exp2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hexp2, static_cast(std::exp2), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2exp2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2exp2(x)` for all possible inputs. The results are + * compared against reference function `float std::exp2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2exp2, static_cast(std::exp2), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog(x)` for all possible inputs. The results are + * compared against reference function `float std::log(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog, static_cast(std::log), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2log); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log(x)` for all possible inputs. The results are + * compared against reference function `float std::log(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log, static_cast(std::log), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog10(x)` for all possible inputs. The results are + * compared against reference function `float std::log10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog10, static_cast(std::log10), + ULPValidatorBuilderFactory(2)); + +MATH_UNARY_HP_KERNEL_DEF(h2log10); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log10(x)` for all possible inputs. The results are + * compared against reference function `float std::log10(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log10, static_cast(std::log10), + ULPValidatorBuilderFactory(2)); + + +MATH_UNARY_HP_KERNEL_DEF(hlog2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hlog2(x)` for all possible inputs. The results are + * compared against reference function `float std::log2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hlog2, static_cast(std::log2), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2log2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2log2(x)` for all possible inputs. The results are + * compared against reference function `float std::log2(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2log2, static_cast(std::log2), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hsqrt(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hsqrt, static_cast(std::sqrt), + ULPValidatorBuilderFactory(1)); + +MATH_UNARY_HP_KERNEL_DEF(h2sqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2sqrt(x)` for all possible inputs. The results are + * compared against reference function `float std::sqrt(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2sqrt, static_cast(std::sqrt), + ULPValidatorBuilderFactory(1)); + + +MATH_UNARY_HP_KERNEL_DEF(hceil); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hceil(x)` for all possible inputs. The results are + * compared against reference function `float std::ceil(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hceil, static_cast(std::ceil), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2ceil); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2ceil(x)` for all possible inputs. The results are + * compared against reference function `float std::ceil(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2ceil, static_cast(std::ceil), + EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(hfloor); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hfloor(x)` for all possible inputs. The results are + * compared against reference function `float std::floor(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hfloor, static_cast(std::floor), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2floor); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2floor(x)` for all possible inputs. The results are + * compared against reference function `float std::floor(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2floor, static_cast(std::floor), + EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(htrunc); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `htrunc(x)` for all possible inputs. The results are + * compared against reference function `float std::trunc(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(htrunc, static_cast(std::trunc), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2trunc); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2trunc(x)` for all possible inputs. The results are + * compared against reference function `float std::trunc(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2trunc, static_cast(std::trunc), + EqValidatorBuilderFactory()); + + +static float hrcp_ref(float x) { return 1.0f / x; } + +MATH_UNARY_HP_KERNEL_DEF(hrcp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrcp(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrcp, hrcp_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rcp); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rcp(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rcp, hrcp_ref, EqValidatorBuilderFactory()); + + +static float hrsqrt_ref(float x) { return 1.0f / std::sqrt(x); } + +MATH_UNARY_HP_KERNEL_DEF(hrsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrsqrt(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrsqrt, hrsqrt_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rsqrt); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rsqrt(x)` for all possible inputs. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rsqrt, hrsqrt_ref, EqValidatorBuilderFactory()); + + +MATH_UNARY_HP_KERNEL_DEF(hrint); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `hrint(x)` for all possible inputs. The results are + * compared against reference function `float std::rint(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(hrint, static_cast(std::rint), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(h2rint); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `h2rint(x)` for all possible inputs. The results are + * compared against reference function `float std::rint(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_math.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(h2rint, static_cast(std::rint), + EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/half_precision_math_negative_kernels.cc b/catch/unit/math/half_precision_math_negative_kernels.cc new file mode 100644 index 0000000000..bf0338974d --- /dev/null +++ b/catch/unit/math/half_precision_math_negative_kernels.cc @@ -0,0 +1,72 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { __half result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half result = func_name(x); } + +UNARY_HALF_NEGATIVE_KERNELS(hcos) +UNARY_HALF_NEGATIVE_KERNELS(hsin) +UNARY_HALF_NEGATIVE_KERNELS(hexp) +UNARY_HALF_NEGATIVE_KERNELS(hexp10) +UNARY_HALF_NEGATIVE_KERNELS(hexp2) +UNARY_HALF_NEGATIVE_KERNELS(hlog) +UNARY_HALF_NEGATIVE_KERNELS(hlog10) +UNARY_HALF_NEGATIVE_KERNELS(hlog2) +UNARY_HALF_NEGATIVE_KERNELS(hsqrt) +UNARY_HALF_NEGATIVE_KERNELS(hceil) +UNARY_HALF_NEGATIVE_KERNELS(hfloor) +UNARY_HALF_NEGATIVE_KERNELS(htrunc) +UNARY_HALF_NEGATIVE_KERNELS(hrcp) +UNARY_HALF_NEGATIVE_KERNELS(hrsqrt) +UNARY_HALF_NEGATIVE_KERNELS(hrint) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +UNARY_HALF2_NEGATIVE_KERNELS(h2cos) +UNARY_HALF2_NEGATIVE_KERNELS(h2sin) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp10) +UNARY_HALF2_NEGATIVE_KERNELS(h2exp2) +UNARY_HALF2_NEGATIVE_KERNELS(h2log) +UNARY_HALF2_NEGATIVE_KERNELS(h2log10) +UNARY_HALF2_NEGATIVE_KERNELS(h2log2) +UNARY_HALF2_NEGATIVE_KERNELS(h2sqrt) +UNARY_HALF2_NEGATIVE_KERNELS(h2ceil) +UNARY_HALF2_NEGATIVE_KERNELS(h2floor) +UNARY_HALF2_NEGATIVE_KERNELS(h2trunc) +UNARY_HALF2_NEGATIVE_KERNELS(h2rcp) +UNARY_HALF2_NEGATIVE_KERNELS(h2rsqrt) +UNARY_HALF2_NEGATIVE_KERNELS(h2rint) From 6ae8cb9e7c58e90c50f6bd058a019499fdf68548 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 6 Feb 2024 16:59:51 +0530 Subject: [PATCH 66/71] EXSWHTEC-329 - Implement tests for half-precision arithmetic functions #410 Change-Id: I754e1aa8a84d775340b9037edb90e75431849bb0 --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/half_precision_arithmetic.cc | 441 ++++++++++++++++++ ...f_precision_arithmetic_negative_kernels.cc | 124 +++++ 3 files changed, 570 insertions(+) create mode 100644 catch/unit/math/half_precision_arithmetic.cc create mode 100644 catch/unit/math/half_precision_arithmetic_negative_kernels.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index 4e344c45c0..e3490dbb51 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -34,6 +34,7 @@ set(TEST_SRC casting_int_funcs.cc casting_half2_funcs.cc half_precision_math.cc + half_precision_arithmetic.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -127,3 +128,7 @@ add_test(NAME Unit_Half_Precision_Math_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} half_precision_math_negative_kernels.cc 60) +add_test(NAME Unit_Half_Precision_Arithmetic_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + half_precision_arithmetic_negative_kernels.cc 88) diff --git a/catch/unit/math/half_precision_arithmetic.cc b/catch/unit/math/half_precision_arithmetic.cc new file mode 100644 index 0000000000..b909fb04af --- /dev/null +++ b/catch/unit/math/half_precision_arithmetic.cc @@ -0,0 +1,441 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionArithmetic HalfPrecisionArithmetic + * @{ + * @ingroup MathTest + */ + + +MATH_UNARY_HP_KERNEL_DEF(__habs); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__habs(x)` for all possible inputs. The results are + * compared against reference function `float std::abs(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__habs, static_cast(std::abs), + EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(__habs2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__habs2(x)` for all possible inputs. The results are + * compared against reference function `float std::abs(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__habs2, static_cast(std::abs), + EqValidatorBuilderFactory()); + + +static float __hneg_ref(float x) { return -x; } + +MATH_UNARY_HP_KERNEL_DEF(__hneg); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneg(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hneg, __hneg_ref, EqValidatorBuilderFactory()); + +MATH_UNARY_HP_KERNEL_DEF(__hneg2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneg2(x)` for all possible inputs. The error bounds are + * IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hneg2, __hneg_ref, EqValidatorBuilderFactory()); + + +// Wrapper to avoid ambiguity error with __hadd(int, int) +__device__ __half __hadd_wrapper(__half x1, __half x2) { return __hadd(x1, x2); } + +static float __hadd_ref(float x1, float x2) { return x1 + x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hadd_wrapper); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_wrapper, __hadd_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hadd2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2, __hadd_ref, EqValidatorBuilderFactory()); + + +static float __hadd_sat_ref(float x1, float x2) { return std::clamp(x1 + x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hadd_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd_sat, __hadd_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hadd2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hadd2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hadd2_sat, __hadd_sat_ref, EqValidatorBuilderFactory()); + + +static float __hsub_ref(float x1, float x2) { return x1 - x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hsub); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub, __hsub_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hsub2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2, __hsub_ref, EqValidatorBuilderFactory()); + + +static float __hsub_sat_ref(float x1, float x2) { return std::clamp(x1 - x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hsub_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub_sat, __hsub_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hsub2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hsub2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hsub2_sat, __hsub_sat_ref, EqValidatorBuilderFactory()); + + +static float __hmul_ref(float x1, float x2) { return x1 * x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hmul); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul, __hmul_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hmul2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2, __hmul_ref, EqValidatorBuilderFactory()); + + +static float __hmul_sat_ref(float x1, float x2) { return std::clamp(x1 * x2, 0.0f, 1.0f); } + +MATH_BINARY_HP_KERNEL_DEF(__hmul_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul_sat, __hmul_sat_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__hmul2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmul2_sat(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmul2_sat, __hmul_sat_ref, EqValidatorBuilderFactory()); + + +static float __hdiv_ref(float x1, float x2) { return x1 / x2; } + +MATH_BINARY_HP_KERNEL_DEF(__hdiv); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hdiv(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hdiv, __hdiv_ref, EqValidatorBuilderFactory()); + +MATH_BINARY_HP_KERNEL_DEF(__h2div); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__h2div(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__h2div, __hdiv_ref, EqValidatorBuilderFactory()); + + +MATH_TERNARY_HP_KERNEL_DEF(__hfma); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma, static_cast(std::fma), + EqValidatorBuilderFactory()); + +MATH_TERNARY_HP_KERNEL_DEF(__hfma2); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma2(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2, static_cast(std::fma), + EqValidatorBuilderFactory()); + + +static float __hfma_sat_ref(float x1, float x2, float x3) { + return std::clamp(std::fma(x1, x2, x3), 0.0f, 1.0f); +} + +MATH_TERNARY_HP_KERNEL_DEF(__hfma_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma_sat(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma_sat, __hfma_sat_ref, EqValidatorBuilderFactory()); + +MATH_TERNARY_HP_KERNEL_DEF(__hfma2_sat); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hfma2_sat(x,y,z)` against a table of difficult values, + * followed by a large number of randomly generated values. The error bounds are IEEE-compliant. + * + * Test source + * ------------------------ + * - unit/math/half_precision_arithmetic.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_TERNARY_HP_TEST_DEF_IMPL(__hfma2_sat, __hfma_sat_ref, EqValidatorBuilderFactory()); \ No newline at end of file diff --git a/catch/unit/math/half_precision_arithmetic_negative_kernels.cc b/catch/unit/math/half_precision_arithmetic_negative_kernels.cc new file mode 100644 index 0000000000..855499816d --- /dev/null +++ b/catch/unit/math/half_precision_arithmetic_negative_kernels.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { __half result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half result = func_name(x); } + +#define BINARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { __half result = func_name(x, y); } + +#define TERNARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(__half x, __half* y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(__half x, __half y, __half* z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, __half y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(__half x, Dummy y, __half z) { \ + __half result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(__half x, __half y, Dummy z) { \ + __half result = func_name(x, y, z); \ + } + +UNARY_HALF_NEGATIVE_KERNELS(__habs) +UNARY_HALF_NEGATIVE_KERNELS(__hneg) + +BINARY_HALF_NEGATIVE_KERNELS(__hadd) +BINARY_HALF_NEGATIVE_KERNELS(__hadd_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hsub) +BINARY_HALF_NEGATIVE_KERNELS(__hsub_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hmul) +BINARY_HALF_NEGATIVE_KERNELS(__hmul_sat) +BINARY_HALF_NEGATIVE_KERNELS(__hdiv) + +TERNARY_HALF_NEGATIVE_KERNELS(__hfma) +TERNARY_HALF_NEGATIVE_KERNELS(__hfma_sat) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +#define BINARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { __half2 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { __half2 result = func_name(x, y); } + +#define TERNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v3(__half2 x, __half2 y, __half2* z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v4(Dummy x, __half2 y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v5(__half2 x, Dummy y, __half2 z) { \ + __half2 result = func_name(x, y, z); \ + } \ + __global__ void func_name##_kernel_v6(__half2 x, __half2 y, Dummy z) { \ + __half2 result = func_name(x, y, z); \ + } + +UNARY_HALF2_NEGATIVE_KERNELS(__habs2) +UNARY_HALF2_NEGATIVE_KERNELS(__hneg2) + +BINARY_HALF2_NEGATIVE_KERNELS(__hadd2) +BINARY_HALF2_NEGATIVE_KERNELS(__hadd2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__hsub2) +BINARY_HALF2_NEGATIVE_KERNELS(__hsub2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__hmul2) +BINARY_HALF2_NEGATIVE_KERNELS(__hmul2_sat) +BINARY_HALF2_NEGATIVE_KERNELS(__h2div) + +TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2) +TERNARY_HALF2_NEGATIVE_KERNELS(__hfma2_sat) \ No newline at end of file From b9ea146b28f169a366c5f90763f7ac49e94a79b3 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Tue, 6 Feb 2024 17:39:51 +0530 Subject: [PATCH 67/71] EXSWHTEC-331 - Implement tests for half-precision comparison functions #411 Change-Id: I378ec4922dc5d3807d5418d690972708549ec764 --- catch/hipTestMain/config/config_amd_linux | 11 + catch/hipTestMain/config/config_amd_windows | 11 + catch/unit/math/CMakeLists.txt | 7 +- catch/unit/math/half_precision_comparison.cc | 847 ++++++++++++++++++ ...f_precision_comparison_negative_kernels.cc | 120 +++ 5 files changed, 995 insertions(+), 1 deletion(-) create mode 100644 catch/unit/math/half_precision_comparison.cc create mode 100644 catch/unit/math/half_precision_comparison_negative_kernels.cc diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 441b3dc9ed..0f6f626367 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -130,6 +130,17 @@ "Unit_deviceAllocation_InOneThread_AccessInAllThreads", "=== Below test is disabled due to defect EXSWHTEC-347 ===", "Unit_hipPointerSetAttribute_Positive_SyncMemops", + "=== Below 2 tests are disable due to defect EXSWHTEC-356 ===", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index dacd9280c5..1522525fea 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -455,6 +455,17 @@ "Unit_Assert_Positive_Basic_KernelFail", "Unit_StaticAssert_Positive_Basic", "Unit_StaticAssert_Negative_Basic", + "=== Below tests are disabled due to defect EXSWHTEC-356 ===", + "Unit_Device___hisinf2_Accuracy_Positive", + "Unit_Device___hisnan2_Accuracy_Positive", + "Unit_Device___hbequ2_Accuracy_Positive", + "Unit_Device___hne_Accuracy_Positive", + "Unit_Device___hne2_Accuracy_Positive", + "Unit_Device___hbne2_Accuracy_Positive", + "Unit_Device___hbgeu2_Accuracy_Positive", + "Unit_Device___hbgtu2_Accuracy_Positive", + "Unit_Device___hbleu2_Accuracy_Positive", + "Unit_Device___hbltu2_Accuracy_Positive", #endif "End of json" ] diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index e3490dbb51..b67c251392 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -34,7 +34,8 @@ set(TEST_SRC casting_int_funcs.cc casting_half2_funcs.cc half_precision_math.cc - half_precision_arithmetic.cc + half_precision_arithmetic.cc + half_precision_comparison.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -132,3 +133,7 @@ add_test(NAME Unit_Half_Precision_Arithmetic_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} half_precision_arithmetic_negative_kernels.cc 88) +add_test(NAME Unit_Half_Precision_Comparison_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + half_precision_comparison_negative_kernels.cc 168) diff --git a/catch/unit/math/half_precision_comparison.cc b/catch/unit/math/half_precision_comparison.cc new file mode 100644 index 0000000000..c736054e6d --- /dev/null +++ b/catch/unit/math/half_precision_comparison.cc @@ -0,0 +1,847 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" + +/** + * @addtogroup HalfPrecisionComparison HalfPrecisionComparison + * @{ + * @ingroup MathTest + */ + +/********** Unary Functions **********/ + +#define MATH_BOOL_UNARY_HP_TEST_DEF(func_name, ref_func) \ + __global__ void func_name##_kernel(bool* const ys, const size_t num_xs, Float16* const xs) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(xs[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + UnaryHalfPrecisionTest(func_name##_kernel, ref_func, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisinf(x)` for all possible inputs. The results are + * compared against reference function `bool std::isinf(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BOOL_UNARY_HP_TEST_DEF(__hisinf, static_cast(std::isinf)) + +static float __hisinf2_ref(float x) { return static_cast(std::isinf(x)); } + +MATH_UNARY_HP_KERNEL_DEF(__hisinf2) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisinf2(x)` for all possible inputs. The results are + * compared against reference function `float std::isinf(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hisinf2, __hisinf2_ref, EqValidatorBuilderFactory()); + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisnan(x)` for all possible inputs. The results are + * compared against reference function `bool std::isnan(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BOOL_UNARY_HP_TEST_DEF(__hisnan, static_cast(std::isnan)) + +static float __hisnan2_ref(float x) { return static_cast(std::isnan(x)); } + +MATH_UNARY_HP_KERNEL_DEF(__hisnan2) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hisnan2(x)` for all possible inputs. The results are + * compared against reference function `float std::isnan(float)`. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_UNARY_HP_TEST_DEF_IMPL(__hisnan2, __hisnan2_ref, EqValidatorBuilderFactory()); + +/********** Binary Functions **********/ + +#define MATH_COMPARISON_HP_TEST_DEF(func_name, ref_func, T, RT, nan_value) \ + __global__ void func_name##_kernel(T* const ys, const size_t num_xs, Float16* const x1s, \ + Float16* const x2s) { \ + const auto tid = cg::this_grid().thread_rank(); \ + const auto stride = cg::this_grid().size(); \ + \ + for (auto i = tid; i < num_xs; i += stride) { \ + ys[i] = func_name(x1s[i], x2s[i]); \ + } \ + } \ + \ + TEST_CASE("Unit_Device_" #func_name "_Accuracy_Positive") { \ + BinaryFloatingPointTest(func_name##_kernel, ref_func, \ + EqValidatorBuilderFactory()); \ + } + + +template static T __heq_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 == x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__heq(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__heq, __heq_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbeq2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbeq2, __heq_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hequ(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hequ, __heq_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbequ2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbequ2, __heq_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__heq2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__heq2, __heq_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hequ2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'equal + * to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hequ2, __heq_ref, Float16, float, true) + + +template static T __hne_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 != x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hne(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hne, __hne_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbne2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbne2, __hne_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hneu, __hne_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbneu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'not equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbneu2, __hne_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hne2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hne2, __hne_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hneu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'not + * equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hneu2, __hne_ref, Float16, float, true) + + +template static T __hge_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 >= x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hge(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hge, __hge_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbge2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbge2, __hge_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgeu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgeu, __hge_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgeu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgeu2, __hge_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hge2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hge2, __hge_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgeu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgeu2, __hge_ref, Float16, float, true) + + +template static T __hgt_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 > x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgt(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgt, __hgt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgt2, __hgt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgtu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgtu, __hgt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbgtu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbgtu2, __hgt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgt2, __hgt_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hgtu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of + * 'greater than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hgtu2, __hgt_ref, Float16, float, true) + + +template static T __hle_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 <= x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hle(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hle, __hle_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hble2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hble2, __hle_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hleu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hleu, __hle_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbleu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'less than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbleu2, __hle_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hle2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hle2, __hle_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hleu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than equal to' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hleu2, __hle_ref, Float16, float, true) + + +template static T __hlt_ref(float x1, float x2) { + if (std::isnan(x1) || std::isnan(x2)) { + return static_cast(nan_value); + } + return x1 < x2; +} + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hlt(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hlt, __hlt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hblt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hblt2, __hlt_ref, bool, bool, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hltu(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hltu, __hlt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hbltu2(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against result + * of 'less than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hbltu2, __hlt_ref, bool, bool, true) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hlt2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hlt2, __hlt_ref, Float16, float, false) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hltu2(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against result of 'less + * than' relational operator for float operands. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_COMPARISON_HP_TEST_DEF(__hltu2, __hlt_ref, Float16, float, true) + +MATH_BINARY_HP_KERNEL_DEF(__hmax) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmax(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against reference + * function `float std::fmax(float, float)` + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmax, static_cast(std::fmax), + EqValidatorBuilderFactory()) + +MATH_BINARY_HP_KERNEL_DEF(__hmin) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmin(x,y)` against a table of difficult values, followed + * by a large number of randomly generated values. The results are compared against reference + * function `float std::fmin(float, float)` + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmin, static_cast(std::fmin), + EqValidatorBuilderFactory()) + +static float __hmax_nan_ref(float x1, float x2) { + if (std::isnan(x1)) + return x1; + else if (std::isnan(x2)) + return x2; + else + return std::fmax(x1, x2); +} + +MATH_BINARY_HP_KERNEL_DEF(__hmax_nan) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmax_nan(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `float std::fmax(float, float)` with modified result when an operand is nan. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmax_nan, __hmax_nan_ref, EqValidatorBuilderFactory()) + +static float __hmin_nan_ref(float x1, float x2) { + if (std::isnan(x1)) + return x1; + else if (std::isnan(x2)) + return x2; + else + return std::fmin(x1, x2); +} + +MATH_BINARY_HP_KERNEL_DEF(__hmin_nan) + +/** + * Test Description + * ------------------------ + * - Tests the numerical accuracy of `__hmin_nan(x,y)` against a table of difficult values, + * followed by a large number of randomly generated values. The results are compared against + * reference function `float std::fmin(float, float)` with modified result when an operand is nan. + * + * Test source + * ------------------------ + * - unit/math/half_precision_comparison.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +MATH_BINARY_HP_TEST_DEF_IMPL(__hmin_nan, __hmin_nan_ref, EqValidatorBuilderFactory()) \ No newline at end of file diff --git a/catch/unit/math/half_precision_comparison_negative_kernels.cc b/catch/unit/math/half_precision_comparison_negative_kernels.cc new file mode 100644 index 0000000000..a045af211c --- /dev/null +++ b/catch/unit/math/half_precision_comparison_negative_kernels.cc @@ -0,0 +1,120 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + + +#define UNARY_BOOL_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x) { bool result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { bool result = func_name(x); } + +#define BINARY_BOOL_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { bool result = func_name(x, y); } + + +#define BINARY_HALF_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half* x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half x, __half* y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half y) { __half result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half x, Dummy y) { __half result = func_name(x, y); } + + +UNARY_BOOL_HALF_NEGATIVE_KERNELS(__hisinf) +UNARY_BOOL_HALF_NEGATIVE_KERNELS(__hisnan) + +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__heq) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hequ) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hne) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hneu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hge) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgeu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgt) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hgtu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hle) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hleu) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hlt) +BINARY_BOOL_HALF_NEGATIVE_KERNELS(__hltu) + +BINARY_HALF_NEGATIVE_KERNELS(__hmax) +BINARY_HALF_NEGATIVE_KERNELS(__hmax_nan) +BINARY_HALF_NEGATIVE_KERNELS(__hmin) +BINARY_HALF_NEGATIVE_KERNELS(__hmin_nan) + + +#define UNARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x) { __half2 result = func_name(x); } \ + __global__ void func_name##_kernel_v2(Dummy x) { __half2 result = func_name(x); } + +#define BINARY_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { \ + __half2 result = func_name(x, y); \ + } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { __half2 result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { __half2 result = func_name(x, y); } + +#define BINARY_BOOL_HALF2_NEGATIVE_KERNELS(func_name) \ + __global__ void func_name##_kernel_v1(__half2* x, __half2 y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v2(__half2 x, __half2* y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v3(Dummy x, __half2 y) { bool result = func_name(x, y); } \ + __global__ void func_name##_kernel_v4(__half2 x, Dummy y) { bool result = func_name(x, y); } + +UNARY_HALF2_NEGATIVE_KERNELS(__hisinf2) +UNARY_HALF2_NEGATIVE_KERNELS(__hisnan2) + +BINARY_HALF2_NEGATIVE_KERNELS(__heq2) +BINARY_HALF2_NEGATIVE_KERNELS(__hequ2) +BINARY_HALF2_NEGATIVE_KERNELS(__hne2) +BINARY_HALF2_NEGATIVE_KERNELS(__hneu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hge2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgeu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgt2) +BINARY_HALF2_NEGATIVE_KERNELS(__hgtu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hle2) +BINARY_HALF2_NEGATIVE_KERNELS(__hleu2) +BINARY_HALF2_NEGATIVE_KERNELS(__hlt2) +BINARY_HALF2_NEGATIVE_KERNELS(__hltu2) + +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbeq2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbequ2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbne2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbneu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbge2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgeu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgt2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbgtu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hble2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbleu2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hblt2) +BINARY_BOOL_HALF2_NEGATIVE_KERNELS(__hbltu2) \ No newline at end of file From f5c3cdaaeb17e864fba713770f381ab654d87994 Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Tue, 6 Feb 2024 20:22:07 +0530 Subject: [PATCH 68/71] EXSWHTEC-332 - Implement tests for half precision type to/from integer casting intrinsics #417 Change-Id: Icee61126274f49f3af362e35c435a9d764378f93 --- catch/unit/math/CMakeLists.txt | 11 + catch/unit/math/casting_half2int_funcs.cc | 440 +++++++++++++++++ .../math/casting_half2int_negative_kernels.cc | 59 +++ catch/unit/math/casting_int2half_funcs.cc | 448 ++++++++++++++++++ .../math/casting_int2half_negative_kernels.cc | 59 +++ 5 files changed, 1017 insertions(+) create mode 100644 catch/unit/math/casting_half2int_funcs.cc create mode 100644 catch/unit/math/casting_half2int_negative_kernels.cc create mode 100644 catch/unit/math/casting_int2half_funcs.cc create mode 100644 catch/unit/math/casting_int2half_negative_kernels.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index b67c251392..d3d06e09f4 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -36,6 +36,8 @@ set(TEST_SRC half_precision_math.cc half_precision_arithmetic.cc half_precision_comparison.cc + casting_half2int_funcs.cc + casting_int2half_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -137,3 +139,12 @@ add_test(NAME Unit_Half_Precision_Comparison_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} half_precision_comparison_negative_kernels.cc 168) +add_test(NAME Unit_Device_casting_half2int_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_half2int_negative_kernels.cc 78) + +add_test(NAME Unit_Device_casting_int2half_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_int2half_negative_kernels.cc 78) diff --git a/catch/unit/math/casting_half2int_funcs.cc b/catch/unit/math/casting_half2int_funcs.cc new file mode 100644 index 0000000000..77c32fcb9c --- /dev/null +++ b/catch/unit/math/casting_half2int_funcs.cc @@ -0,0 +1,440 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingIntTypes HalfPrecisionCastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_HALF2INT_RN_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, T, Float16) \ + CAST_F2I_RZ_REF_DEF(kern_name, T, Float16) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + T (*ref)(Float16) = kern_name##_ref; \ + CastUnaryHalfPrecisionTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rn, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rz, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_rd, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2int_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2int_ru, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rn, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_rd, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2uint_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned int. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2uint_ru, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rn` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rn, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rz` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rz, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_rd` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_rd, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2short_ru` for all possible inputs. The results are compared + * against reference function which performs __half cast to short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2short_ru, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rn` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rn, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rz` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rz, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_rd` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_rd, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ushort_ru` for all possible inputs. The results are compared + * against reference function which performs __half cast to unsigned short. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ushort_ru, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rn, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rz, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_rd, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ll_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ll_ru, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rn` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rn, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rz` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rz, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_rd` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_rd, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2ull_ru` for all possible inputs. The results are compared against + * reference function which performs __half cast to unsigned long long. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_HALF2INT_RN_TEST_DEF(__half2ull_ru, unsigned long long) + +CAST_KERNEL_DEF(__half_as_short, short, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half_as_short` for all possible inputs. The results are compared + * against reference function which performs copy of __half value to short variable. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half_as_short_Accuracy_Positive") { + short (*ref)(Float16) = type2_as_type1_ref; + CastUnaryHalfPrecisionTest(__half_as_short_kernel, ref, EqValidatorBuilderFactory()); +} + +CAST_KERNEL_DEF(__half_as_ushort, unsigned short, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half_as_ushort` for all possible inputs. The results are compared + * against reference function which performs copy of __half value to unsigned short variable. + * + * Test source + * ------------------------ + * - unit/math/casting_half2int_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half_as_ushort_Accuracy_Positive") { + unsigned short (*ref)(Float16) = type2_as_type1_ref; + CastUnaryHalfPrecisionTest(__half_as_ushort_kernel, ref, + EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_half2int_negative_kernels.cc b/catch/unit/math/casting_half2int_negative_kernels.cc new file mode 100644 index 0000000000..6b7d75040f --- /dev/null +++ b/catch/unit/math/casting_half2int_negative_kernels.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(T* result, __half* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(T* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, __half x) { *result = unc_name(x); } + +NEGATIVE_KERNELS_SHELL(__half2int_rn, int) +NEGATIVE_KERNELS_SHELL(__half2int_rz, int) +NEGATIVE_KERNELS_SHELL(__half2int_rd, int) +NEGATIVE_KERNELS_SHELL(__half2int_ru, int) +NEGATIVE_KERNELS_SHELL(__half2uint_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2uint_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__half2short_rn, short) +NEGATIVE_KERNELS_SHELL(__half2short_rz, short) +NEGATIVE_KERNELS_SHELL(__half2short_rd, short) +NEGATIVE_KERNELS_SHELL(__half2short_ru, short) +NEGATIVE_KERNELS_SHELL(__half_as_short, short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rn, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rz, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_rd, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ushort_ru, unsigned short) +NEGATIVE_KERNELS_SHELL(__half_as_ushort, unsigned short) +NEGATIVE_KERNELS_SHELL(__half2ll_rn, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_rz, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_rd, long long) +NEGATIVE_KERNELS_SHELL(__half2ll_ru, long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rn, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rz, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_rd, unsigned long long) +NEGATIVE_KERNELS_SHELL(__half2ull_ru, unsigned long long) \ No newline at end of file diff --git a/catch/unit/math/casting_int2half_funcs.cc b/catch/unit/math/casting_int2half_funcs.cc new file mode 100644 index 0000000000..d0d404ebc9 --- /dev/null +++ b/catch/unit/math/casting_int2half_funcs.cc @@ -0,0 +1,448 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingIntTypes HalfPrecisionCastingIntTypes + * @{ + * @ingroup MathTest + */ + +#define CAST_INT2HALF_RN_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, Float16, T) \ + CAST_REF_DEF(kern_name, Float16, T) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(T) = kern_name##_ref; \ + CastIntRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rn` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rn, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rz` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rz, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_rd` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_rd, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__int2half_ru` for all possible inputs. The results are compared against + * reference function which performs int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__int2half_ru, int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rn` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rn, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rz` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rz, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_rd` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_rd, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__uint2half_ru` for all possible inputs. The results are compared against + * reference function which performs unsigned int cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__uint2half_ru, unsigned int) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rn` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rn, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rz` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rz, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_rd` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_rd, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short2half_ru` for all possible inputs. The results are compared + * against reference function which performs short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__short2half_ru, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rn` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rn, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rz` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rz, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_rd` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_rd, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort2half_ru` for all possible inputs. The results are compared + * against reference function which performs unsigned short cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_INT2HALF_RN_TEST_DEF(__ushort2half_ru, unsigned short) + +#define CAST_LL2HALF_TEST_DEF(kern_name, T) \ + CAST_KERNEL_DEF(kern_name, Float16, T) \ + CAST_REF_DEF(kern_name, Float16, T) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(T) = kern_name##_ref; \ + CastIntBruteForceTest(kern_name##_kernel, ref, EqValidatorBuilderFactory()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rn, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rz, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_rd, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ll2half_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ll2half_ru, long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rn` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rn, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rz` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rz, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_rd` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_rd, unsigned long long) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ull2half_ru` against a large number of randomly generated values. The + * results are compared against reference function which performs unsigned long long cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_LL2HALF_TEST_DEF(__ull2half_ru, unsigned long long) + +CAST_KERNEL_DEF(__short_as_half, Float16, short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__short_as_half` for all possible inputs. The results are compared + * against reference function which performs copy of short value to __half variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___short_as_half_Accuracy_Positive") { + Float16 (*ref)(short) = type2_as_type1_ref; + CastIntBruteForceTest(__short_as_half_kernel, ref, EqValidatorBuilderFactory()); +} + +CAST_KERNEL_DEF(__ushort_as_half, Float16, unsigned short) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__ushort_as_half` for all possible inputs. The results are compared + * against reference function which performs copy of unsigned short value to __half variable. + * + * Test source + * ------------------------ + * - unit/math/casting_int2half_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___ushort_as_half_Accuracy_Positive") { + Float16 (*ref)(unsigned short) = type2_as_type1_ref; + CastIntBruteForceTest(__ushort_as_half_kernel, ref, EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_int2half_negative_kernels.cc b/catch/unit/math/casting_int2half_negative_kernels.cc new file mode 100644 index 0000000000..a23da476e6 --- /dev/null +++ b/catch/unit/math/casting_int2half_negative_kernels.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_KERNELS_SHELL(func_name, T) \ + __global__ void func_name##_kernel_v1(__half* result, T* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(__half* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, T x) { *result = func_name(x); } + +NEGATIVE_KERNELS_SHELL(__int2half_rn, int) +NEGATIVE_KERNELS_SHELL(__int2half_rz, int) +NEGATIVE_KERNELS_SHELL(__int2half_rd, int) +NEGATIVE_KERNELS_SHELL(__int2half_ru, int) +NEGATIVE_KERNELS_SHELL(__uint2half_rn, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_rz, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_rd, unsigned int) +NEGATIVE_KERNELS_SHELL(__uint2half_ru, unsigned int) +NEGATIVE_KERNELS_SHELL(__short2half_rn, short) +NEGATIVE_KERNELS_SHELL(__short2half_rz, short) +NEGATIVE_KERNELS_SHELL(__short2half_rd, short) +NEGATIVE_KERNELS_SHELL(__short2half_ru, short) +NEGATIVE_KERNELS_SHELL(__short_as_half, short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rn, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rz, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_rd, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort2half_ru, unsigned short) +NEGATIVE_KERNELS_SHELL(__ushort_as_half, unsigned short) +NEGATIVE_KERNELS_SHELL(__ll2half_rn, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_rz, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_rd, long long) +NEGATIVE_KERNELS_SHELL(__ll2half_ru, long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rn, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rz, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_rd, unsigned long long) +NEGATIVE_KERNELS_SHELL(__ull2half_ru, unsigned long long) \ No newline at end of file From 84c48e0e3cde020af8a7c7bfc82d3798bfd97d9d Mon Sep 17 00:00:00 2001 From: Nives Vukovic Date: Tue, 6 Feb 2024 20:28:32 +0530 Subject: [PATCH 69/71] EXSWHTEC-335 - Implement tests for half precision type to/from float casting intrinsics #418 Change-Id: I7fa0c5d0683f9c633f126ebe241f925155dd0e7d --- catch/unit/math/CMakeLists.txt | 5 + catch/unit/math/casting_half_float_funcs.cc | 247 ++++++++++++++++++ .../casting_half_float_negative_kernels.cc | 45 ++++ 3 files changed, 297 insertions(+) create mode 100644 catch/unit/math/casting_half_float_funcs.cc create mode 100644 catch/unit/math/casting_half_float_negative_kernels.cc diff --git a/catch/unit/math/CMakeLists.txt b/catch/unit/math/CMakeLists.txt index d3d06e09f4..e646a7da83 100644 --- a/catch/unit/math/CMakeLists.txt +++ b/catch/unit/math/CMakeLists.txt @@ -38,6 +38,7 @@ set(TEST_SRC half_precision_comparison.cc casting_half2int_funcs.cc casting_int2half_funcs.cc + casting_half_float_funcs.cc ) if(HIP_PLATFORM MATCHES "nvidia") @@ -148,3 +149,7 @@ add_test(NAME Unit_Device_casting_int2half_Negative COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} casting_int2half_negative_kernels.cc 78) +add_test(NAME Unit_Device_casting_half_float_Negative + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + casting_half_float_negative_kernels.cc 18) diff --git a/catch/unit/math/casting_half_float_funcs.cc b/catch/unit/math/casting_half_float_funcs.cc new file mode 100644 index 0000000000..23d6de2e8b --- /dev/null +++ b/catch/unit/math/casting_half_float_funcs.cc @@ -0,0 +1,247 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "half_precision_common.hh" +#include "casting_common.hh" + +/** + * @addtogroup HalfPrecisionCastingFloat HalfPrecisionCastingFloat + * @{ + * @ingroup MathTest + */ + +#define CAST_FLOAT2HALF_TEST_DEF(kern_name, round_dir) \ + CAST_KERNEL_DEF(kern_name, Float16, float) \ + CAST_RND_REF_DEF(kern_name, Float16, float, round_dir) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Limited_Positive") { \ + Float16 (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::min(), 0.f); \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + 0.0001f, std::numeric_limits::max()); \ + } + +#define CAST_FLOAT2HALF_RN_TEST_DEF(kern_name) \ + CAST_KERNEL_DEF(kern_name, Float16, float) \ + CAST_REF_DEF(kern_name, Float16, float) \ + \ + TEST_CASE("Unit_Device_" #kern_name "_Accuracy_Positive") { \ + Float16 (*ref)(float) = kern_name##_ref; \ + UnarySinglePrecisionRangeTest(kern_name##_kernel, ref, EqValidatorBuilderFactory(), \ + std::numeric_limits::min(), \ + std::numeric_limits::max()); \ + } + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rd` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_DOWNWARD rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_rd, FE_DOWNWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rn` for all possible inputs. The results are compared against + * reference function which performs float cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_RN_TEST_DEF(__float2half_rn) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half` for all possible inputs. The results are compared against + * reference function which performs float cast to __half. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_RN_TEST_DEF(__float2half) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_ru` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_UPWARD rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_ru, FE_UPWARD) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__float2half_rz` for all possible inputs apart from very small positive + * values. Rounding behaviour is not correct for host functions for this range. The results are + * compared against reference function which performs float cast to __half with FE_TOWARDZERO rounding + * mode. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +CAST_FLOAT2HALF_TEST_DEF(__float2half_rz, FE_TOWARDZERO) + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_rd` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_rd_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.34465e-07, 1.19209e-07, 5.96046e-08, 0, 0, 0, 0, 0, 0, 0, 0}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_rd_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_ru` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_ru_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.9407e-07, 1.78814e-07, 1.19209e-07, 5.96046e-08, + 5.96046e-08, 5.96046e-08, 5.96046e-08, 5.96046e-08, + 5.96046e-08, 5.96046e-08, 5.96046e-08}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_ru_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +/** + * Test Description + * ------------------------ + * - Sanity test that checks `__float2half_rz` for very small positive values. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___float2half_rz_SmallVals_Sanity_Positive") { + const float input[] = {0.8859e-06f, 1.5454e-07f, 6.5955e-08f, 2.7955e-08f, + 3.7956e-09f, 4.8995e-10f, 5.7997e-15f, 6.2117e-20f, + 7.4999e-25f, 8.9999e-30f, 9.0001e-35f}; + const Float16 reference[] = {8.34465e-07, 1.19209e-07, 5.96046e-08, 0, 0, 0, 0, 0, 0, 0, 0}; + LinearAllocGuard input_dev{LinearAllocs::hipMalloc, sizeof(float)}; + LinearAllocGuard out(LinearAllocs::hipMallocManaged, sizeof(Float16)); + + + for (int i = 0; i < 11; ++i) { + HIP_CHECK(hipMemcpy(input_dev.ptr(), input + i, sizeof(float), hipMemcpyHostToDevice)); + + __float2half_rz_kernel<<<1, 1>>>(out.ptr(), 1, input_dev.ptr()); + HIP_CHECK(hipDeviceSynchronize()); + REQUIRE(out.ptr()[0] == reference[i]); + } +} + +CAST_KERNEL_DEF(__half2float, float, Float16) +CAST_REF_DEF(__half2float, float, Float16) + +/** + * Test Description + * ------------------------ + * - Tests that checks `__half2float` for all possible inputs. The results are compared against + * reference function which performs __half cast to float. + * + * Test source + * ------------------------ + * - unit/math/casting_half_float_funcs.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___half2float_Accuracy_Positive") { + float (*ref)(Float16) = __half2float_ref; + UnaryHalfPrecisionTest(__half2float_kernel, ref, EqValidatorBuilderFactory()); +} \ No newline at end of file diff --git a/catch/unit/math/casting_half_float_negative_kernels.cc b/catch/unit/math/casting_half_float_negative_kernels.cc new file mode 100644 index 0000000000..9d849e6f5e --- /dev/null +++ b/catch/unit/math/casting_half_float_negative_kernels.cc @@ -0,0 +1,45 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define NEGATIVE_F2H_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(__half* result, float* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(__half* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, float x) { *result = func_name(x); } + +#define NEGATIVE_H2F_KERNELS_SHELL(func_name) \ + __global__ void func_name##_kernel_v1(float* result, __half* x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v2(float* result, Dummy x) { *result = func_name(x); } \ + __global__ void func_name##_kernel_v3(Dummy* result, __half x) { *result = func_name(x); } + +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rd) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rn) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_ru) +NEGATIVE_F2H_KERNELS_SHELL(__float2half_rz) +NEGATIVE_F2H_KERNELS_SHELL(__float2half) + +NEGATIVE_H2F_KERNELS_SHELL(__half2float) \ No newline at end of file From 9f0265304b303a72f27e9634e1a2890f164ca2c7 Mon Sep 17 00:00:00 2001 From: Mirza Halilcevic Date: Tue, 6 Feb 2024 20:39:04 +0530 Subject: [PATCH 70/71] EXSWHTEC-334 - Extend tests for warp shlf_up and shfl_down functions to support half-precision types #419 Change-Id: Ib10102dc36b7ff90c5cc4b486a297bb4f3eb8d68 --- catch/unit/warp/CMakeLists.txt | 2 ++ catch/unit/warp/warp_shfl_down.cc | 2 +- catch/unit/warp/warp_shfl_up.cc | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/catch/unit/warp/CMakeLists.txt b/catch/unit/warp/CMakeLists.txt index 87bb5b0ab1..df5fa974a7 100644 --- a/catch/unit/warp/CMakeLists.txt +++ b/catch/unit/warp/CMakeLists.txt @@ -1,5 +1,7 @@ # Common Tests - Test independent of all platforms set(TEST_SRC + warp_shfl_up.cc + warp_shfl_down.cc warp_shfl_xor.cc warp_shfl.cc warp_shfl_up.cc diff --git a/catch/unit/warp/warp_shfl_down.cc b/catch/unit/warp/warp_shfl_down.cc index d42a19c7d8..58ad8528e7 100644 --- a/catch/unit/warp/warp_shfl_down.cc +++ b/catch/unit/warp/warp_shfl_down.cc @@ -100,7 +100,7 @@ template class WarpShflDown : public WarpShflTest, * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Down_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); diff --git a/catch/unit/warp/warp_shfl_up.cc b/catch/unit/warp/warp_shfl_up.cc index 201289f363..5c55c8a9b9 100644 --- a/catch/unit/warp/warp_shfl_up.cc +++ b/catch/unit/warp/warp_shfl_up.cc @@ -99,7 +99,7 @@ template class WarpShflUp : public WarpShflTest, T> { * - Device supports warp shuffle */ TEMPLATE_TEST_CASE("Unit_Warp_Shfl_Up_Positive_Basic", "", int, unsigned int, long, unsigned long, - long long, unsigned long long, float, double) { + long long, unsigned long long, float, double, __half, __half2) { int device; hipDeviceProp_t device_properties; HIP_CHECK(hipGetDevice(&device)); From 147601aff25f5054dac97b94f2942115aa4f894f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 14:45:18 +0100 Subject: [PATCH 71/71] EXSWHTEC-354 - Implement additional tests for integer intrinsics #430 Change-Id: Icd155c2d2121bd9d25e7ac6ece9599553956457c --- catch/hipTestMain/config/config_amd_linux | 5 + catch/hipTestMain/config/config_amd_windows | 5 + catch/unit/math/integer_intrinsics.cc | 474 ++++++++++++++++++++ 3 files changed, 484 insertions(+) diff --git a/catch/hipTestMain/config/config_amd_linux b/catch/hipTestMain/config/config_amd_linux index 0f6f626367..a02d218e60 100644 --- a/catch/hipTestMain/config/config_amd_linux +++ b/catch/hipTestMain/config/config_amd_linux @@ -141,6 +141,11 @@ "Unit_Device___hbgtu2_Accuracy_Positive", "Unit_Device___hbleu2_Accuracy_Positive", "Unit_Device___hbltu2_Accuracy_Positive", + "=== Below 4 tests are disable due to defect EXSWHTEC-355 ===", + "Unit_Device___hadd_Sanity_Positive", + "Unit_Device___uhadd_Sanity_Positive", + "Unit_Device___rhadd_Sanity_Positive", + "Unit_Device___urhadd_Sanity_Positive", "=== Patch which removes the typetraits implementation from std namespace in hiprtc is reverted ===", "Unit_hiprtc_stdheaders", "Unit_hipGraphAddMemcpyNode_Negative_Parameters", diff --git a/catch/hipTestMain/config/config_amd_windows b/catch/hipTestMain/config/config_amd_windows index 1522525fea..8dbe04c1b0 100644 --- a/catch/hipTestMain/config/config_amd_windows +++ b/catch/hipTestMain/config/config_amd_windows @@ -466,6 +466,11 @@ "Unit_Device___hbgtu2_Accuracy_Positive", "Unit_Device___hbleu2_Accuracy_Positive", "Unit_Device___hbltu2_Accuracy_Positive", + "=== Below 4 tests are disable due to defect EXSWHTEC-355 ===", + "Unit_Device___hadd_Sanity_Positive", + "Unit_Device___uhadd_Sanity_Positive", + "Unit_Device___rhadd_Sanity_Positive", + "Unit_Device___urhadd_Sanity_Positive", #endif "End of json" ] diff --git a/catch/unit/math/integer_intrinsics.cc b/catch/unit/math/integer_intrinsics.cc index d851577831..ee9cd760a8 100644 --- a/catch/unit/math/integer_intrinsics.cc +++ b/catch/unit/math/integer_intrinsics.cc @@ -317,4 +317,478 @@ TEST_CASE("Unit_Device___umul24_Sanity_Positive") { HIP_CHECK(hipDeviceSynchronize()); REQUIRE(y.ptr()[0] == x1 * x2); +} + +__global__ void __funnelshift_l_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_l(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_l(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_l_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_l_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast((hi_lo << (shift & 31)) >> 32)); + } +} + +__global__ void __funnelshift_lc_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_lc(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_lc(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_lc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_lc_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast((hi_lo << std::min(shift, 32u)) >> 32)); + } +} + +__global__ void __funnelshift_r_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_r(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_r(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_r_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_r_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast(hi_lo >> (shift & 31))); + } +} + +__global__ void __funnelshift_rc_kernel(unsigned int* y, unsigned int lo, unsigned int hi, + unsigned int shift) { + y[0] = __funnelshift_rc(lo, hi, shift); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__funnelshift_rc(lo,hi,shift)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___funnelshift_rc_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + const unsigned int lo = 0xAAAAAAAA, hi = 0xBBBBBBBB; + const unsigned long long hi_lo = (static_cast(hi) << 32) | lo; + + for (unsigned int shift = 0; shift < 64; ++shift) { + __funnelshift_rc_kernel<<<1, 1>>>(y.ptr(), lo, hi, shift); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("shift: " << shift); + REQUIRE(y.ptr()[0] == static_cast(hi_lo >> std::min(shift, 32u))); + } +} + +__global__ void __hadd_kernel(int* y, int x1, int x2) { y[0] = __hadd(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__hadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___hadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __hadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2) >> 1)); +} + +__global__ void __uhadd_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __uhadd(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__uhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___uhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __uhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2) >> 1)); +} + +__global__ void __rhadd_kernel(int* y, int x1, int x2) { y[0] = __rhadd(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__rhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___rhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __rhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == static_cast((static_cast(x1) + x2 + 1) >> 1)); +} + +__global__ void __urhadd_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __urhadd(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__urhadd(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___urhadd_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __urhadd_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) + x2 + 1) >> 1)); +} + +__global__ void __mulhi_kernel(int* y, int x1, int x2) { y[0] = __mulhi(x1, x2); } + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mulhi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mulhi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mulhi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) * static_cast(x2)) >> 32)); +} + +__global__ void __umulhi_kernel(unsigned int* y, unsigned int x1, unsigned int x2) { + y[0] = __umulhi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umulhi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umulhi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __umulhi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast((static_cast(x1) * x2) >> 32)); +} + +__global__ void __mul64hi_kernel(long long* y, long long x1, long long x2) { + y[0] = __mul64hi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__mul64hi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___mul64hi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(long long)); + + long long x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + long long x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + + __mul64hi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE( + y.ptr()[0] == + static_cast((static_cast<__int128_t>(x1) * static_cast<__int128_t>(x2)) >> 64)); +} + +__global__ void __umul64hi_kernel(unsigned long long* y, unsigned long long x1, + unsigned long long x2) { + y[0] = __umul64hi(x1, x2); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__umul64hi(x,y)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___umul64hi_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, + sizeof(unsigned long long)); + + unsigned long long x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned long long x2 = GENERATE(0, 42, 0xFFFFFFFF); + + __umul64hi_kernel<<<1, 1>>>(y.ptr(), x1, x2); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + static_cast( + (static_cast<__uint128_t>(x1) * static_cast<__uint128_t>(x2)) >> 64)); +} + +__global__ void __sad_kernel(unsigned int* y, int x1, int x2, unsigned int x3) { + y[0] = __sad(x1, x2, x3); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__sad(x,y,z)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___sad_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + int x1 = GENERATE(0, -42, 42, 0xFFFFFFFF); + int x2 = GENERATE(0, -42, 42, 0xFFFFFFFF); + unsigned int x3 = GENERATE(0, 42, 0xFFFFFFFF); + + __sad_kernel<<<1, 1>>>(y.ptr(), x1, x2, x3); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == (static_cast(std::abs(x1 - x2)) + x3)); +} + +__global__ void __usad_kernel(unsigned int* y, unsigned int x1, unsigned int x2, unsigned int x3) { + y[0] = __usad(x1, x2, x3); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__usad(x,y,z)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___usad_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int x1 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x2 = GENERATE(0, 42, 0xFFFFFFFF); + unsigned int x3 = GENERATE(0, 42, 0xFFFFFFFF); + + __usad_kernel<<<1, 1>>>(y.ptr(), x1, x2, x3); + HIP_CHECK(hipDeviceSynchronize()); + + INFO("x1: " << x1); + INFO("x2: " << x2); + REQUIRE(y.ptr()[0] == + (static_cast( + std::abs(static_cast(x1) - static_cast(x2))) + + x3)); +} + +__global__ void __byte_perm(unsigned int* y, unsigned int x1, unsigned int x2, unsigned int s) { + y[0] = __byte_perm(x1, x2, s); +} + +/** + * Test Description + * ------------------------ + * - Sanity test for `__byte_perm(x,y,s)`. + * + * Test source + * ------------------------ + * - unit/math/integer_intrinsics.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_Device___byte_perm_Sanity_Positive") { + LinearAllocGuard y(LinearAllocs::hipMallocManaged, sizeof(unsigned int)); + + unsigned int bytes[] = {0x88, 0x99, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF}; + + unsigned int x1 = (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | bytes[0]; + unsigned int x2 = (bytes[7] << 24) | (bytes[6] << 16) | (bytes[5] << 8) | bytes[4]; + + unsigned int s0 = GENERATE(0, 1); + unsigned int s1 = GENERATE(2, 3); + unsigned int s2 = GENERATE(4, 5); + unsigned int s3 = GENERATE(6, 7); + + unsigned int s = (s3 << 12) | (s2 << 8) | (s1 << 4) | s0; + + __byte_perm<<<1, 1>>>(y.ptr(), x1, x2, s); + HIP_CHECK(hipDeviceSynchronize()); + + unsigned int expected = (bytes[s3] << 24) | (bytes[s2] << 16) | (bytes[s1] << 8) | bytes[s0]; + REQUIRE(y.ptr()[0] == expected); } \ No newline at end of file