EXSWHTEC-275 - Implement tests for atomic CAS operations #408
Change-Id: I77c6995f1f85dedce3b3afb907abb03a4a1f2b83
[ROCm/hip-tests commit: 7659470dbc]
这个提交包含在:
@@ -46,6 +46,8 @@ set(TEST_SRC
|
||||
atomicSub_system.cc
|
||||
atomicInc.cc
|
||||
atomicDec.cc
|
||||
atomicCAS.cc
|
||||
atomicCAS_system.cc
|
||||
atomicExch.cc
|
||||
atomicExch_system.cc
|
||||
__hip_atomic_fetch_and.cc
|
||||
@@ -57,6 +59,7 @@ set(TEST_SRC
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
set_source_files_properties(atomicAdd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
set_source_files_properties(atomicSub_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
set_source_files_properties(atomicCAS_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
set_source_files_properties(atomicExch_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
set_source_files_properties(atomicAnd_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
set_source_files_properties(atomicOr_system.cc PROPERTIES COMPILE_FLAGS "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80")
|
||||
@@ -123,6 +126,11 @@ add_test(NAME Unit_atomicDec_Negative_Parameters
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
atomicDec_negative_kernels.cc 8)
|
||||
|
||||
add_test(NAME Unit_atomicCAS_Negative_Parameters
|
||||
COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${HIP_PLATFORM} ${HIP_PATH}
|
||||
atomicCAS_negative_kernels.cc 48)
|
||||
|
||||
# SWDEV-435667: Below 2 tests failed in stress test on 01/12/23
|
||||
#add_test(NAME Unit_atomicExch_Negative_Parameters
|
||||
# COMMAND python3 ${CMAKE_CURRENT_SOURCE_DIR}/../compileAndCaptureOutput.py
|
||||
|
||||
@@ -38,7 +38,9 @@ enum class AtomicOperation {
|
||||
kInc,
|
||||
kDec,
|
||||
kUnsafeAdd,
|
||||
kSafeAdd
|
||||
kSafeAdd,
|
||||
kCASAdd,
|
||||
kCASAddSystem
|
||||
};
|
||||
|
||||
// Constants that are passed as operands to the atomic operations
|
||||
@@ -59,6 +61,31 @@ __host__ __device__ TestType GetTestValue() {
|
||||
return std::is_floating_point_v<TestType> ? kFloatingPointTestValue : kIntegerTestValue;
|
||||
}
|
||||
|
||||
// Implements an atomic addition via atomicCAS
|
||||
template <typename TestType> __device__ TestType CASAtomicAdd(TestType* address, TestType val) {
|
||||
TestType old = *address, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS(address, assumed, val + assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
// Implements an atomic addition via atomicCAS_system
|
||||
template <typename TestType>
|
||||
__device__ TestType CASAtomicAddSystem(TestType* address, TestType val) {
|
||||
TestType old = *address, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
old = atomicCAS_system(address, assumed, val + assumed);
|
||||
} while (assumed != old);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
||||
// Performs an atomic operation on parameter `mem` based on the `operation` enumerator.
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
__device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
@@ -80,6 +107,10 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
return unsafeAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kSafeAdd) {
|
||||
return safeAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kCASAdd) {
|
||||
return CASAtomicAdd(mem, val);
|
||||
} else if constexpr (operation == AtomicOperation::kCASAddSystem) {
|
||||
return CASAtomicAddSystem(mem, val);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,7 +233,8 @@ std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAdd || operation == AtomicOperation::kAddSystem ||
|
||||
operation == AtomicOperation::kUnsafeAdd ||
|
||||
operation == AtomicOperation::kSafeAdd) {
|
||||
operation == AtomicOperation::kSafeAdd || operation == AtomicOperation::kCASAdd ||
|
||||
operation == AtomicOperation::kCASAddSystem) {
|
||||
res = res + val;
|
||||
} else if constexpr (operation == AtomicOperation::kSub ||
|
||||
operation == AtomicOperation::kSubSystem) {
|
||||
@@ -270,7 +302,8 @@ void HostAtomicOperation(const unsigned int iterations, TestType* mem, TestType*
|
||||
const auto val = GetTestValue<TestType, operation>();
|
||||
|
||||
for (auto i = 0u; i < iterations; ++i) {
|
||||
if constexpr (operation == AtomicOperation::kAddSystem) {
|
||||
if constexpr (operation == AtomicOperation::kAddSystem ||
|
||||
operation == AtomicOperation::kCASAddSystem) {
|
||||
old_vals[i] = __atomic_fetch_add(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
|
||||
} else if constexpr (operation == AtomicOperation::kSubSystem) {
|
||||
old_vals[i] = __atomic_fetch_sub(PitchedOffset(mem, pitch, i % width), val, __ATOMIC_RELAXED);
|
||||
|
||||
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
#include "atomicCAS_negative_kernels_rtc.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicCAS atomicCAS
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
#ifdef HT_NVIDIA
|
||||
#define TYPES
|
||||
#else
|
||||
#define TYPES , float, double
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a single kernel on a single device wherein all threads will perform an atomic
|
||||
* addition, implemented using an atomic CAS operation, on a target memory location. Each thread
|
||||
* will add the same value to the memory location, storing the return value into a separate output
|
||||
* array slot corresponding to it. Once complete, the output array and target memory is validated to
|
||||
* contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Shared memory
|
||||
* - Several grid and block dimension combinations (only one block is used for shared memory).
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive", "", int, unsigned int, unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceSingleKernelTest<TestType, AtomicOperation::kCASAdd>(warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a single device wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
|
||||
* thread will add the same value to the memory location, storing the return value into a separate
|
||||
* output array slot corresponding to it. Once complete, the output array and target memory is
|
||||
* validated to contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_Positive_Multi_Kernel", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
SingleDeviceMultipleKernelTest<TestType, AtomicOperation::kCASAdd>(2, warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - RTCs kernels that pass combinations of arguments of invalid types for all overloads of
|
||||
* atomicCAS.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Unit_atomicCAS_Negative_Parameters_RTC") {
|
||||
hiprtcProgram program{};
|
||||
|
||||
const auto program_source = GENERATE(kAtomicCAS_int, kAtomicCAS_uint, kAtomicCAS_ulong,
|
||||
kAtomicCAS_ulonglong, kAtomicCAS_float, kAtomicCAS_double);
|
||||
HIPRTC_CHECK(
|
||||
hiprtcCreateProgram(&program, program_source, "atomicCAS_negative.cc", 0, nullptr, nullptr));
|
||||
hiprtcResult result{hiprtcCompileProgram(program, 0, nullptr)};
|
||||
|
||||
// Get the compile log and count compiler error messages
|
||||
size_t log_size{};
|
||||
HIPRTC_CHECK(hiprtcGetProgramLogSize(program, &log_size));
|
||||
std::string log(log_size, ' ');
|
||||
HIPRTC_CHECK(hiprtcGetProgramLog(program, log.data()));
|
||||
int error_count{0};
|
||||
|
||||
int expected_error_count{8};
|
||||
std::string error_message{"error:"};
|
||||
|
||||
size_t n_pos = log.find(error_message, 0);
|
||||
while (n_pos != std::string::npos) {
|
||||
++error_count;
|
||||
n_pos = log.find(error_message, n_pos + 1);
|
||||
}
|
||||
|
||||
HIPRTC_CHECK(hiprtcDestroyProgram(&program));
|
||||
HIPRTC_CHECK_ERROR(result, HIPRTC_ERROR_COMPILATION);
|
||||
REQUIRE(error_count == expected_error_count);
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
#define ATOMIC_CAS_NEGATIVE_KERNEL(type_name) \
|
||||
__global__ void atomicCAS_v1(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(&address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v2(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(address, address, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v3(type_name* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, address); \
|
||||
} \
|
||||
__global__ void atomicCAS_v4(Dummy* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v5(char* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v6(short* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v7(long* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
} \
|
||||
__global__ void atomicCAS_v8(long long* address, type_name* result) { \
|
||||
*result = atomicCAS(address, 12, 13); \
|
||||
}
|
||||
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(int)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned int)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(unsigned long long)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(float)
|
||||
ATOMIC_CAS_NEGATIVE_KERNEL(double)
|
||||
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
Negative kernels used for the atomics negative Test Cases that are using RTC.
|
||||
*/
|
||||
|
||||
static constexpr auto kAtomicCAS_int{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_int_v1(int* address, int* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v2(int* address, int* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v3(int* address, int* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v4(Dummy* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v5(char* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v6(short* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v7(long* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_int_v8(long long* address, int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_uint{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_uint_v1(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v2(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v3(unsigned int* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v4(Dummy* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v5(char* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v6(short* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v7(long* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_uint_v8(long long* address, unsigned int* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_ulong{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_ulong_v1(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v2(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v3(unsigned long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v4(Dummy* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v5(char* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v6(short* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v7(long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulong_v8(long long* address, unsigned long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_ulonglong{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v1(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v2(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v3(unsigned long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v4(Dummy* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v5(char* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v6(short* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v7(long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_ulonglong_v8(long long* address, unsigned long long* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_float{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_float_v1(float* address, float* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v2(float* address, float* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v3(float* address, float* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v4(Dummy* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v5(char* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v6(short* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v7(long* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_float_v8(long long* address, float* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
|
||||
static constexpr auto kAtomicCAS_double{
|
||||
R"(
|
||||
class Dummy {
|
||||
public:
|
||||
__device__ Dummy() {}
|
||||
__device__ ~Dummy() {}
|
||||
};
|
||||
|
||||
__global__ void atomicCAS_double_v1(double* address, double* result) {
|
||||
*result = atomicCAS(&address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v2(double* address, double* result) {
|
||||
*result = atomicCAS(address, address, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v3(double* address, double* result) {
|
||||
*result = atomicCAS(address, 12, address);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v4(Dummy* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v5(char* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v6(short* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v7(long* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
|
||||
__global__ void atomicCAS_double_v8(long long* address, double* result) {
|
||||
*result = atomicCAS(address, 12, 13);
|
||||
}
|
||||
)"};
|
||||
@@ -0,0 +1,185 @@
|
||||
/*
|
||||
Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "arithmetic_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup atomicCAS_system atomicCAS_system
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
#ifdef HT_NVIDIA
|
||||
#define TYPES
|
||||
#else
|
||||
#define TYPES , float, double
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times concurrently on a two devices wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location. Each
|
||||
* thread will add the same value to the memory location, storing the return value into a separate
|
||||
* output array slot corresponding to it. Once complete, the output array and target memory is
|
||||
* validated to contain all the expected values. Several memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, 1, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType));
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel on a single device wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
|
||||
* Each thread will add the same value to the memory location, storing the return value into a
|
||||
* separate output array slot corresponding to it. While the kernel is running, the host
|
||||
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
|
||||
* output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_GPU", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
1, 1, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Executes a kernel two times on two devices wherein all threads will perform
|
||||
* an atomic addition, implemented using an atomic CAS operation, on a target memory location.
|
||||
* Each thread will add the same value to the memory location, storing the return value into a
|
||||
* separate output array slot corresponding to it. While the kernel is running, the host
|
||||
* performs atomic additions, in 4 threads, on the same memory location(s). Once complete, the
|
||||
* output array and target memory is validated to contain all the expected values. Several
|
||||
* memory access patterns are tested:
|
||||
* -# All threads exchange to a single, compile time deducible, memory location
|
||||
* -# Each thread targets an array containing warp_size elements, using tid % warp_size
|
||||
* for indexing
|
||||
* -# Same as the above, but the elements are spread out by L1 cache line size bytes.
|
||||
*
|
||||
* - The test is run for:
|
||||
* - All overloads of atomicCAS_system
|
||||
* - hipMalloc, hipMallocManaged, hipHostMalloc and hipHostRegister allocated memory
|
||||
* - Several grid and block dimension combinations.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/atomicCAS_system.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit_atomicCAS_system_Positive_Host_And_Peer_GPUs", "", int, unsigned int,
|
||||
unsigned long long TYPES) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, 1, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Adjacent addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, sizeof(TestType), 4);
|
||||
}
|
||||
|
||||
DYNAMIC_SECTION("Scattered addresses " << current) {
|
||||
MultipleDeviceMultipleKernelAndHostTest<TestType, AtomicOperation::kCASAddSystem>(
|
||||
2, 2, warp_size, cache_line_size, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
在新工单中引用
屏蔽一个用户