diff --git a/projects/hip-tests/catch/unit/atomics/CMakeLists.txt b/projects/hip-tests/catch/unit/atomics/CMakeLists.txt index fa77a87a99..f18abbf3e5 100644 --- a/projects/hip-tests/catch/unit/atomics/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/atomics/CMakeLists.txt @@ -25,6 +25,14 @@ set(TEST_SRC atomicOr_system.cc atomicXor.cc atomicXor_system.cc + atomicMin.cc + atomicMin_system.cc + atomicMax.cc + atomicMax_system.cc + safeAtomicMin.cc + unsafeAtomicMin.cc + safeAtomicMax.cc + unsafeAtomicMax.cc atomicExch.cc atomicExch_system.cc ) @@ -34,18 +42,20 @@ if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicXor_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicMin_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicMax_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests LINKER_LIBS "nvrtc -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") - set(EXPECTED_ERRORS 36) # EXSWHTEC-278 + set(EXPECTED_ERRORS 42) # EXSWHTEC-278 elseif(HIP_PLATFORM MATCHES "amd") hip_add_exe_to_target(NAME AtomicsTest TEST_SRC ${TEST_SRC} TEST_TARGET_NAME build_tests LINKER_LIBS hiprtc) - set(EXPECTED_ERRORS 40) + set(EXPECTED_ERRORS 48) endif() add_test(NAME Unit_atomicAnd_Negative_Parameters @@ -63,6 +73,16 @@ add_test(NAME Unit_atomicXor_Negative_Parameters ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicXor_negative_kernels.cc ${EXPECTED_ERRORS}) +add_test(NAME Unit_atomicMin_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicMin_negative_kernels.cc ${EXPECTED_ERRORS}) + +add_test(NAME Unit_atomicMax_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicMax_negative_kernels.cc ${EXPECTED_ERRORS}) + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py diff --git a/projects/hip-tests/catch/unit/atomics/atomicMax.cc b/projects/hip-tests/catch/unit/atomics/atomicMax.cc new file mode 100644 index 0000000000..e98ceaaf4c --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMax.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMax_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax atomicMax + * @{ + * @ingroup AtomicsTest + * `atomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMax with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMax_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMax_int, kAtomicMax_uint, kAtomicMax_ulong, + kAtomicMax_ulonglong, kAtomicMax_float, kAtomicMax_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMax_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels.cc b/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels.cc new file mode 100644 index 0000000000..2f9b6a6306 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMax(int* address, int val) */ +__global__ void atomicMax_int_v1(int* address, int* result) { *result = atomicMax(&address, 1234); } + +__global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_int_v3(int* address, int* result) { *result = atomicMax(1234, 1234); } + +__global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v5(char* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_int_v7(long* address, int* result) { *result = atomicMax(address, 1234); } + +__global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); +} + +/* unsigned int atomicMax(unsigned int* address, unsigned int val) */ +__global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long* address, unsigned long val) */ +__global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); +} + +__global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); +} + +__global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +__global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(float* address, float val) */ +__global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); +} + +__global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); +} + +__global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); +} + +__global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); +} + +/* atomicMax(double* address, double val) */ +__global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); +} + +__global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); +} + +__global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); +} + +__global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); +} + +__global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh new file mode 100644 index 0000000000..885f9f5250 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMax_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMax_int{ + R"( + __global__ void atomicMax_int_v1(int* address, int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_int_v2(int* address, int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_int_v3(int* address, int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_int_v4(Dummy* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v5(char* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v6(short* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v7(long* address, int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_int_v8(long long* address, int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_uint{ + R"( + __global__ void atomicMax_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v5(char* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v6(short* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v7(long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_uint_v8(long long* address, unsigned int* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulong{ + R"( + __global__ void atomicMax_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v5(char* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v6(short* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v7(long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_ulonglong{ + R"( + __global__ void atomicMax_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(&address, 1234); + } + + __global__ void atomicMax_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMax(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + + __global__ void atomicMax_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_float{ + R"( + __global__ void atomicMax_float_v1(float* address, float* result) { + *result = atomicMax(&address, 1234.f); + } + + __global__ void atomicMax_float_v2(float* address, float* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_float_v3(float* address, float* result) { + *result = atomicMax(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_float_v4(Dummy* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v5(char* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v6(short* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v7(long* address, float* result) { + *result = atomicMax(address, 1234.f); + } + + __global__ void atomicMax_float_v8(long long* address, float* result) { + *result = atomicMax(address, 1234); + } + )"}; + +static constexpr auto kAtomicMax_double{ + R"( + __global__ void atomicMax_double_v1(double* address, double* result) { + *result = atomicMax(&address, 1234.0); + } + + __global__ void atomicMax_double_v2(double* address, double* result) { + *result = atomicMax(address, address); + } + + __global__ void atomicMax_double_v3(double* address, double* result) { + *result = atomicMax(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMax_double_v4(Dummy* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v5(char* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v6(short* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v7(long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + + __global__ void atomicMax_double_v8(long long* address, double* result) { + *result = atomicMax(address, 1234.0); + } + )"}; diff --git a/projects/hip-tests/catch/unit/atomics/atomicMax_system.cc b/projects/hip-tests/catch/unit/atomics/atomicMax_system.cc new file mode 100644 index 0000000000..b07b566616 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMax_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMax_system atomicMax_system + * @{ + * @ingroup AtomicsTest + * `atomicMax_system(TestType* address, TestType* val)` - + * performs system-wide atomic maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMax_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMax_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMax_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicMin.cc b/projects/hip-tests/catch/unit/atomics/atomicMin.cc new file mode 100644 index 0000000000..3d0f89412f --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMin.cc @@ -0,0 +1,222 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "atomicMin_negative_kernels_rtc.hh" +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin atomicMin + * @{ + * @ingroup AtomicsTest + * `atomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_SameAddress", "", int, unsigned int, unsigned long, + unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Scattered_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin from multiple threads on the scaterred addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Compiles atomicMin with invalid parameters. + * - Compiles the source with RTC. + * Test source + * ------------------------ + * - unit/atomics/atomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicMin_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicMin_int, kAtomicMin_uint, kAtomicMin_ulong, + kAtomicMin_ulonglong, kAtomicMin_float, kAtomicMin_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicMin_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + // Please check the content of negative_kernels_rtc.hh + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels.cc b/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels.cc new file mode 100644 index 0000000000..644b7aaf8b --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels.cc @@ -0,0 +1,219 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +/* int atomicMin(int* address, int val) */ +__global__ void atomicMin_int_v1(int* address, int* result) { *result = atomicMin(&address, 1234); } + +__global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_int_v3(int* address, int* result) { *result = atomicMin(1234, 1234); } + +__global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v5(char* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_int_v7(long* address, int* result) { *result = atomicMin(address, 1234); } + +__global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); +} + +/* unsigned int atomicMin(unsigned int* address, unsigned int val) */ +__global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long* address, unsigned long val) */ +__global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(unsigned long long* address, unsigned long long val) */ +__global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); +} + +__global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); +} + +__global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +__global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(float* address, float val) */ +__global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); +} + +__global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); +} + +__global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); +} + +__global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); +} + +/* atomicMin(double* address, double val) */ +__global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); +} + +__global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); +} + +__global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); +} + +__global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); +} + +__global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh new file mode 100644 index 0000000000..cc1ae5c7af --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMin_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicMin_int{ + R"( + __global__ void atomicMin_int_v1(int* address, int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_int_v2(int* address, int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_int_v3(int* address, int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_int_v4(Dummy* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v5(char* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v6(short* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v7(long* address, int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_int_v8(long long* address, int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_uint{ + R"( + __global__ void atomicMin_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v5(char* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v6(short* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v7(long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_uint_v8(long long* address, unsigned int* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulong{ + R"( + __global__ void atomicMin_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v5(char* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v6(short* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v7(long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulong_v8(long long* address, unsigned long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_ulonglong{ + R"( + __global__ void atomicMin_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(&address, 1234); + } + + __global__ void atomicMin_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicMin(1234, 1234); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + + __global__ void atomicMin_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_float{ + R"( + __global__ void atomicMin_float_v1(float* address, float* result) { + *result = atomicMin(&address, 1234.f); + } + + __global__ void atomicMin_float_v2(float* address, float* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_float_v3(float* address, float* result) { + *result = atomicMin(1234.f, 1234.f); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_float_v4(Dummy* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v5(char* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v6(short* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v7(long* address, float* result) { + *result = atomicMin(address, 1234.f); + } + + __global__ void atomicMin_float_v8(long long* address, float* result) { + *result = atomicMin(address, 1234); + } + )"}; + +static constexpr auto kAtomicMin_double{ + R"( + __global__ void atomicMin_double_v1(double* address, double* result) { + *result = atomicMin(&address, 1234.0); + } + + __global__ void atomicMin_double_v2(double* address, double* result) { + *result = atomicMin(address, address); + } + + __global__ void atomicMin_double_v3(double* address, double* result) { + *result = atomicMin(1234.0, 1234.0); + } + + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicMin_double_v4(Dummy* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v5(char* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v6(short* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v7(long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + + __global__ void atomicMin_double_v8(long long* address, double* result) { + *result = atomicMin(address, 1234.0); + } + )"}; diff --git a/projects/hip-tests/catch/unit/atomics/atomicMin_system.cc b/projects/hip-tests/catch/unit/atomics/atomicMin_system.cc new file mode 100644 index 0000000000..7474a2e10d --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicMin_system.cc @@ -0,0 +1,124 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup atomicMin_system atomicMin_system + * @{ + * @ingroup AtomicsTest + * `atomicMin_system(TestType* address, TestType* val)` - + * performs system-wide atomic minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on the same address. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Same_Address", "", int, unsigned int, + unsigned long, unsigned long long) { +#endif + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on adjacent addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Adjacent_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs atomicMin_system from multiple threads on scaterred addresses. + * - Uses multiple devices and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/atomicMin_system.cc + * Test requirements + * ------------------------ + * - Multi-device + * - HIP_VERSION >= 5.2 + */ +#if HT_AMD +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long, float, double) { +#else +TEMPLATE_TEST_CASE("Unit_atomicMin_system_Positive_Peer_GPUs_Scattered_Addresses", "", int, + unsigned int, unsigned long, unsigned long long) { +#endif + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::MultipleDeviceMultipleKernelTest( + 2, 2, warp_size, cache_line_size); + } + } +} diff --git a/projects/hip-tests/catch/unit/atomics/min_max_common.hh b/projects/hip-tests/catch/unit/atomics/min_max_common.hh new file mode 100644 index 0000000000..c171c6f3c6 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/min_max_common.hh @@ -0,0 +1,360 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace cg = cooperative_groups; + +namespace MinMax { +enum class AtomicOperation { + kMin = 0, + kMinSystem, + kMax, + kMaxSystem, + kSafeMin, + kUnsafeMin, + kSafeMax, + kUnsafeMax +}; + +constexpr auto kIntegerTestValue = 5; +constexpr auto kFloatingPointTestValue = 5.5; + +template +__host__ __device__ TestType GetTestValue() { + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin) { + return test_value - 2; + } + + return test_value + 2; +} + +template +__device__ TestType PerformAtomicOperation(TestType* const mem) { + const auto val = GetTestValue(); + + if constexpr (operation == AtomicOperation::kMin) { + return atomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kMinSystem) { + return atomicMin_system(mem, val); + } else if constexpr (operation == AtomicOperation::kMax) { + return atomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kMaxSystem) { + return atomicMax_system(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMin) { + return unsafeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMin) { + return safeAtomicMin(mem, val); + } else if constexpr (operation == AtomicOperation::kUnsafeMax) { + return unsafeAtomicMax(mem, val); + } else if constexpr (operation == AtomicOperation::kSafeMax) { + return safeAtomicMax(mem, val); + } +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) { + __shared__ TestType shared_mem; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? &shared_mem : global_mem; + + if constexpr (use_shared_mem) { + if (tid == 0) mem[0] = global_mem[0]; + __syncthreads(); + } + + old_vals[tid] = PerformAtomicOperation(mem); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid == 0) global_mem[0] = mem[0]; + } +} + +template +__host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned int pitch, + const unsigned int idx) { + const auto byte_ptr = reinterpret_cast(ptr); + return reinterpret_cast(byte_ptr + idx * pitch); +} + +template +__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals, + const unsigned int width, const unsigned pitch) { + extern __shared__ uint8_t shared_mem[]; + + const auto tid = cg::this_grid().thread_rank(); + + TestType* const mem = use_shared_mem ? reinterpret_cast(shared_mem) : global_mem; + + if constexpr (use_shared_mem) { + if (tid < width) { + const auto target = PitchedOffset(mem, pitch, tid); + *target = *PitchedOffset(global_mem, pitch, tid); + }; + __syncthreads(); + } + + old_vals[tid] = + PerformAtomicOperation(PitchedOffset(mem, pitch, tid % width)); + + if constexpr (use_shared_mem) { + __syncthreads(); + if (tid < width) { + const auto target = PitchedOffset(global_mem, pitch, tid); + *target = *PitchedOffset(mem, pitch, tid); + }; + } +} + +struct TestParams { + auto ThreadCount() const { + return blocks.x * blocks.y * blocks.z * threads.x * threads.y * threads.z; + } + + dim3 blocks; + dim3 threads; + unsigned int num_devices = 1u; + unsigned int kernel_count = 1u; + unsigned int width = 1u; + unsigned int pitch = 0u; + unsigned int host_thread_count = 0u; + LinearAllocs alloc_type; +}; + +template +std::tuple, std::vector> TestKernelHostRef(const TestParams& p) { + const auto val = GetTestValue(); + + const auto thread_count = p.num_devices * p.kernel_count * p.ThreadCount(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + + std::vector res_vals(p.width, test_value); + std::vector old_vals; + old_vals.reserve(thread_count); + + for (auto tid = 0u; tid < thread_count; ++tid) { + auto& res = res_vals[tid % p.width]; + old_vals.push_back(res); + + if constexpr (operation == AtomicOperation::kMin || operation == AtomicOperation::kMinSystem || + operation == AtomicOperation::kUnsafeMin || + operation == AtomicOperation::kSafeMin) { + res = std::min(res, val); + } else if constexpr (operation == AtomicOperation::kMax || + operation == AtomicOperation::kMaxSystem || + operation == AtomicOperation::kUnsafeMax || + operation == AtomicOperation::kSafeMax) { + res = std::max(res, val); + } + } + + return {res_vals, old_vals}; +} + +template +void Verify(const TestParams& p, std::vector& res_vals, std::vector& old_vals) { + auto [expected_res_vals, expected_old_vals] = TestKernelHostRef(p); + + for (auto i = 0u; i < res_vals.size(); ++i) { + INFO("Results index: " << i); + REQUIRE(expected_res_vals[i] == res_vals[i]); + } + + std::sort(begin(old_vals), end(old_vals)); + std::sort(begin(expected_old_vals), end(expected_old_vals)); + for (auto i = 0u; i < old_vals.size(); ++i) { + INFO("Old values index: " << i); + REQUIRE(expected_old_vals[i] == old_vals[i]); + } +} + +template +void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr, + TestType* const old_vals) { + const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u; + if (p.width == 1 && p.pitch == sizeof(TestType)) + TestKernel + <<>>(mem_ptr, old_vals); + else + TestKernel + <<>>(mem_ptr, old_vals, p.width, p.pitch); +} + +template +void TestCore(const TestParams& p) { + const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType); + std::vector> old_vals_devs; + std::vector streams; + for (auto i = 0; i < p.num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + old_vals_devs.emplace_back(LinearAllocs::hipMalloc, old_vals_alloc_size); + for (auto j = 0; j < p.kernel_count; ++j) { + streams.emplace_back(Streams::created); + } + } + + const auto mem_alloc_size = p.width * p.pitch; + LinearAllocGuard mem_dev(p.alloc_type, mem_alloc_size); + + std::vector old_vals(p.num_devices * p.kernel_count * p.ThreadCount()); + std::vector res_vals(p.width); + + TestType* const mem_ptr = + p.alloc_type == LinearAllocs::hipMalloc ? mem_dev.ptr() : mem_dev.host_ptr(); + + TestType test_value = + std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; + HIP_CHECK(hipMemset(mem_ptr, 0, mem_alloc_size)); + for (int i = 0; i < p.width * p.pitch / sizeof(TestType); ++i) { + HIP_CHECK(hipMemcpy(&mem_ptr[i], &test_value, sizeof(TestType), hipMemcpyHostToDevice)); + } + + for (auto i = 0u; i < p.num_devices; ++i) { + for (auto j = 0u; j < p.kernel_count; ++j) { + const auto& stream = streams[i * p.kernel_count + j].stream(); + const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount(); + LaunchKernel(p, stream, mem_dev.ptr(), old_vals); + } + } + + for (auto i = 0u; i < p.num_devices; ++i) { + const auto device_offset = i * p.kernel_count * p.ThreadCount(); + HIP_CHECK(hipMemcpy(old_vals.data() + device_offset, old_vals_devs[i].ptr(), + old_vals_alloc_size, hipMemcpyDeviceToHost)); + } + HIP_CHECK(hipMemcpy2D(res_vals.data(), sizeof(TestType), mem_ptr, p.pitch, sizeof(TestType), + p.width, hipMemcpyDeviceToHost)); + + Verify(p, res_vals, old_vals); +} + +template +void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) { + TestParams params; + params.num_devices = 1; + params.kernel_count = 1; + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + SECTION("Global memory") { + params.blocks = GENERATE(dim3(3)); + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } + } + + SECTION("Shared memory") { + params.blocks = dim3(1); + params.alloc_type = LinearAllocs::hipMalloc; + TestCore(params); + } +} + +template +void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, 0)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + + TestParams params; + params.num_devices = 1; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : + {LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} + +template +void MultipleDeviceMultipleKernelTest(const unsigned int num_devices, + const unsigned int kernel_count, const unsigned int width, + const unsigned int pitch) { + if (num_devices > 1) { + if (HipTest::getDeviceCount() < num_devices) { + std::string msg = std::to_string(num_devices) + " devices are required"; + HipTest::HIP_SKIP_TEST(msg.c_str()); + return; + } + } + + if (kernel_count > 1) { + for (auto i = 0u; i < num_devices; ++i) { + int concurrent_kernels = 0; + HIP_CHECK(hipDeviceGetAttribute(&concurrent_kernels, hipDeviceAttributeConcurrentKernels, i)); + if (!concurrent_kernels) { + HipTest::HIP_SKIP_TEST("Test requires support for concurrent kernel execution"); + return; + } + } + } + + TestParams params; + params.num_devices = num_devices; + params.kernel_count = kernel_count; + params.blocks = GENERATE(dim3(3)); + params.threads = GENERATE(dim3(1023)); + params.width = width; + params.pitch = pitch; + + using LA = LinearAllocs; + for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) { + params.alloc_type = alloc_type; + DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) { + TestCore(params); + } + } +} +} // namespace MinMax diff --git a/projects/hip-tests/catch/unit/atomics/safeAtomicMax.cc b/projects/hip-tests/catch/unit/atomics/safeAtomicMax.cc new file mode 100644 index 0000000000..581a4a566a --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/safeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMax safeAtomicMax + * @{ + * @ingroup AtomicsTest + * `safeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/projects/hip-tests/catch/unit/atomics/safeAtomicMin.cc b/projects/hip-tests/catch/unit/atomics/safeAtomicMin.cc new file mode 100644 index 0000000000..810be72ca4 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/safeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup safeAtomicMin safeAtomicMin + * @{ + * @ingroup AtomicsTest + * `safeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs safeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/safeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_safeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/projects/hip-tests/catch/unit/atomics/unsafeAtomicMax.cc b/projects/hip-tests/catch/unit/atomics/unsafeAtomicMax.cc new file mode 100644 index 0000000000..2341e2d8c7 --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/unsafeAtomicMax.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMax unsafeAtomicMax + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMax(TestType* address, TestType* val)` - + * calculates maximum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMax from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMax.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMax_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +} diff --git a/projects/hip-tests/catch/unit/atomics/unsafeAtomicMin.cc b/projects/hip-tests/catch/unit/atomics/unsafeAtomicMin.cc new file mode 100644 index 0000000000..60b827854a --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/unsafeAtomicMin.cc @@ -0,0 +1,175 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "min_max_common.hh" + +#include + +/** + * @addtogroup unsafeAtomicMin unsafeAtomicMin + * @{ + * @ingroup AtomicsTest + * `unsafeAtomicMin(TestType* address, TestType* val)` - + * calculates minimum between address and val, returns old value. + */ + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_SameAddress", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceSingleKernelTest( + 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Adjacent_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches one kernel. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Scattered_Addresses", "", float, double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceSingleKernelTest( + warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the same address. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Same_Address", "", float, double) { + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, 1, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on adjacent addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Adjacent_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Adjacent address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, sizeof(TestType)); + } + } +} + +/** + * Test Description + * ------------------------ + * - Performs unsafeAtomicMin from multiple threads on the scattered addresses. + * - Uses only one device and launches multiple kernels. + * Test source + * ------------------------ + * - unit/atomics/unsafeAtomicMin.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_unsafeAtomicMin_Positive_Multi_Kernel_Scattered_Addresses", "", float, + double) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Scattered address " << current) { + MinMax::SingleDeviceMultipleKernelTest( + 2, warp_size, cache_line_size); + } + } +}