From af3497362eecdf2bcb5330d168a4d4d77c03dc82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirza=20Halil=C4=8Devi=C4=87?= <109971222+mirza-halilcevic@users.noreply.github.com> Date: Thu, 28 Dec 2023 15:55:53 +0100 Subject: [PATCH] EXSWHTEC-275 - Implement tests for atomic CAS operations #408 Change-Id: I77c6995f1f85dedce3b3afb907abb03a4a1f2b83 [ROCm/hip-tests commit: 7659470dbc88f6b08a029cdcfd304edb868fe718] --- .../catch/unit/atomics/CMakeLists.txt | 8 + .../catch/unit/atomics/arithmetic_common.hh | 39 ++- .../hip-tests/catch/unit/atomics/atomicCAS.cc | 172 +++++++++++ .../atomics/atomicCAS_negative_kernels.cc | 62 ++++ .../atomics/atomicCAS_negative_kernels_rtc.hh | 273 ++++++++++++++++++ .../catch/unit/atomics/atomicCAS_system.cc | 185 ++++++++++++ 6 files changed, 736 insertions(+), 3 deletions(-) create mode 100644 projects/hip-tests/catch/unit/atomics/atomicCAS.cc create mode 100644 projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels.cc create mode 100644 projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh create mode 100644 projects/hip-tests/catch/unit/atomics/atomicCAS_system.cc diff --git a/projects/hip-tests/catch/unit/atomics/CMakeLists.txt b/projects/hip-tests/catch/unit/atomics/CMakeLists.txt index fecef54c85..1ec472bffc 100644 --- a/projects/hip-tests/catch/unit/atomics/CMakeLists.txt +++ b/projects/hip-tests/catch/unit/atomics/CMakeLists.txt @@ -46,6 +46,8 @@ set(TEST_SRC atomicSub_system.cc atomicInc.cc atomicDec.cc + atomicCAS.cc + atomicCAS_system.cc atomicExch.cc atomicExch_system.cc __hip_atomic_fetch_and.cc @@ -57,6 +59,7 @@ set(TEST_SRC if(HIP_PLATFORM MATCHES "nvidia") set_source_files_properties(atomicAdd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicSub_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") + set_source_files_properties(atomicCAS_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80") @@ -123,6 +126,11 @@ add_test(NAME Unit_atomicDec_Negative_Parameters ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} atomicDec_negative_kernels.cc 8) +add_test(NAME Unit_atomicCAS_Negative_Parameters + COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py + ${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH} + atomicCAS_negative_kernels.cc 48) + # SWDEV-435667: Below 2 tests failed in stress test on 01/12/23 #add_test(NAME Unit_atomicExch_Negative_Parameters # COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py diff --git a/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh b/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh index 0be866390e..cc701a06a0 100644 --- a/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh +++ b/projects/hip-tests/catch/unit/atomics/arithmetic_common.hh @@ -38,7 +38,9 @@ enum class AtomicOperation { kInc, kDec, kUnsafeAdd, - kSafeAdd + kSafeAdd, + kCASAdd, + kCASAddSystem }; // Constants that are passed as operands to the atomic operations @@ -59,6 +61,31 @@ __host__ __device__ TestType GetTestValue() { return std::is_floating_point_v ? kFloatingPointTestValue : kIntegerTestValue; } +// Implements an atomic addition via atomicCAS +template __device__ TestType CASAtomicAdd(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + +// Implements an atomic addition via atomicCAS_system +template +__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) { + TestType old = *address, assumed; + + do { + assumed = old; + old = atomicCAS_system(address, assumed, val + assumed); + } while (assumed != old); + + return old; +} + // Performs an atomic operation on parameter `mem` based on the `operation` enumerator. template __device__ TestType PerformAtomicOperation(TestType* const mem) { @@ -80,6 +107,10 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) { return unsafeAtomicAdd(mem, val); } else if constexpr (operation == AtomicOperation::kSafeAdd) { return safeAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAdd) { + return CASAtomicAdd(mem, val); + } else if constexpr (operation == AtomicOperation::kCASAddSystem) { + return CASAtomicAddSystem(mem, val); } } @@ -202,7 +233,8 @@ std::tuple, std::vector> TestKernelHostRef(const if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem || operation == AtomicOperation::kUnsafeAdd || - operation == AtomicOperation::kSafeAdd) { + operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd || + operation == AtomicOperation::kCASAddSystem) { res = res + val; } else if constexpr (operation == AtomicOperation::kSub || operation == AtomicOperation::kSubSystem) { @@ -270,7 +302,8 @@ void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType* const auto val = GetTestValue(); for (auto i = 0u; i < iterations; ++i) { - if constexpr (operation == AtomicOperation::kAddSystem) { + if constexpr (operation == AtomicOperation::kAddSystem || + operation == AtomicOperation::kCASAddSystem) { old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); } else if constexpr (operation == AtomicOperation::kSubSystem) { old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED); diff --git a/projects/hip-tests/catch/unit/atomics/atomicCAS.cc b/projects/hip-tests/catch/unit/atomics/atomicCAS.cc new file mode 100644 index 0000000000..3be684306d --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicCAS.cc @@ -0,0 +1,172 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" +#include "atomicCAS_negative_kernels_rtc.hh" + +#include + +/** + * @addtogroup atomicCAS atomicCAS + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a single kernel on a single device wherein all threads will perform an atomic + * addition, implemented using an atomic CAS operation, on a target memory location. Each thread + * will add the same value to the memory location, storing the return value into a separate output + * array slot corresponding to it. Once complete, the output array and target memory is validated to + * contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Shared memory + * - Several grid and block dimension combinations (only one block is used for shared memory). + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceSingleKernelTest(1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceSingleKernelTest(warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + SingleDeviceMultipleKernelTest(2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + SingleDeviceMultipleKernelTest(2, warp_size, + cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - RTCs kernels that pass combinations of arguments of invalid types for all overloads of + * atomicCAS. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") { + hiprtcProgram program{}; + + const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong, + kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double); + HIPRTC_CHECK( + hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr)); + hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)}; + + // Get the compile log and count compiler error messages + size_t log_size{}; + HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size)); + std::string log(log_size, ' '); + HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data())); + int error_count{0}; + + int expected_error_count{8}; + std::string error_message{"error:"}; + + size_t n_pos = log.find(error_message, 0); + while (n_pos != std::string::npos) { + ++error_count; + n_pos = log.find(error_message, n_pos + 1); + } + + HIPRTC_CHECK(hiprtcDestroyProgram(&program)); + HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION); + REQUIRE(error_count == expected_error_count); +} diff --git a/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels.cc b/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels.cc new file mode 100644 index 0000000000..b0390bb3fa --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels.cc @@ -0,0 +1,62 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} +}; + +#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \ + __global__ void atomicCAS_v1(type_name* address, type_name* result) { \ + *result = atomicCAS(&address, 12, 13); \ + } \ + __global__ void atomicCAS_v2(type_name* address, type_name* result) { \ + *result = atomicCAS(address, address, 13); \ + } \ + __global__ void atomicCAS_v3(type_name* address, type_name* result) { \ + *result = atomicCAS(address, 12, address); \ + } \ + __global__ void atomicCAS_v4(Dummy* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v5(char* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v6(short* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v7(long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } \ + __global__ void atomicCAS_v8(long long* address, type_name* result) { \ + *result = atomicCAS(address, 12, 13); \ + } + +ATOMIC_CAS_NEGATIVE_KERNEL(int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long) +ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long) +ATOMIC_CAS_NEGATIVE_KERNEL(float) +ATOMIC_CAS_NEGATIVE_KERNEL(double) diff --git a/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh b/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh new file mode 100644 index 0000000000..952c4892fb --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicCAS_negative_kernels_rtc.hh @@ -0,0 +1,273 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +/* +Negative kernels used for the atomics negative Test Cases that are using RTC. +*/ + +static constexpr auto kAtomicCAS_int{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_int_v1(int* address, int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_int_v2(int* address, int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_int_v3(int* address, int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_int_v4(Dummy* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v5(char* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v6(short* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v7(long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_int_v8(long long* address, int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_uint{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v5(char* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v6(short* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v7(long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_ulonglong{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_float{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_float_v1(float* address, float* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_float_v2(float* address, float* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_float_v3(float* address, float* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_float_v4(Dummy* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v5(char* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v6(short* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v7(long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_float_v8(long long* address, float* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; + +static constexpr auto kAtomicCAS_double{ + R"( + class Dummy { + public: + __device__ Dummy() {} + __device__ ~Dummy() {} + }; + + __global__ void atomicCAS_double_v1(double* address, double* result) { + *result = atomicCAS(&address, 12, 13); + } + + __global__ void atomicCAS_double_v2(double* address, double* result) { + *result = atomicCAS(address, address, 13); + } + + __global__ void atomicCAS_double_v3(double* address, double* result) { + *result = atomicCAS(address, 12, address); + } + + __global__ void atomicCAS_double_v4(Dummy* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v5(char* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v6(short* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v7(long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + + __global__ void atomicCAS_double_v8(long long* address, double* result) { + *result = atomicCAS(address, 12, 13); + } + )"}; diff --git a/projects/hip-tests/catch/unit/atomics/atomicCAS_system.cc b/projects/hip-tests/catch/unit/atomics/atomicCAS_system.cc new file mode 100644 index 0000000000..8f2dd8306b --- /dev/null +++ b/projects/hip-tests/catch/unit/atomics/atomicCAS_system.cc @@ -0,0 +1,185 @@ +/* +Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "arithmetic_common.hh" + +#include + +/** + * @addtogroup atomicCAS_system atomicCAS_system + * @{ + * @ingroup AtomicsTest + */ + +#ifdef HT_NVIDIA +#define TYPES +#else +#define TYPES , float, double +#endif + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times concurrently on a two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each + * thread will add the same value to the memory location, storing the return value into a separate + * output array slot corresponding to it. Once complete, the output array and target memory is + * validated to contain all the expected values. Several memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType)); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType)); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel on a single device wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 1, 1, warp_size, cache_line_size, 4); + } + } +} + +/** + * Test Description + * ------------------------ + * - Executes a kernel two times on two devices wherein all threads will perform + * an atomic addition, implemented using an atomic CAS operation, on a target memory location. + * Each thread will add the same value to the memory location, storing the return value into a + * separate output array slot corresponding to it. While the kernel is running, the host + * performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the + * output array and target memory is validated to contain all the expected values. Several + * memory access patterns are tested: + * -# All threads exchange to a single, compile time deducible, memory location + * -# Each thread targets an array containing warp_size elements, using tid % warp_size + * for indexing + * -# Same as the above, but the elements are spread out by L1 cache line size bytes. + * + * - The test is run for: + * - All overloads of atomicCAS_system + * - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory + * - Several grid and block dimension combinations. + * Test source + * ------------------------ + * - unit/atomics/atomicCAS_system.cc + * Test requirements + * ------------------------ + * - HIP_VERSION >= 5.2 + */ +TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int, + unsigned long long TYPES) { + int warp_size = 0; + HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0)); + const auto cache_line_size = 128u; + + for (auto current = 0; current < cmd_options.iterations; ++current) { + DYNAMIC_SECTION("Same address " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, 1, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Adjacent addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, sizeof(TestType), 4); + } + + DYNAMIC_SECTION("Scattered addresses " << current) { + MultipleDeviceMultipleKernelAndHostTest( + 2, 2, warp_size, cache_line_size, 4); + } + } +}