EXSWHTEC-300 - Extend tests for atomic bitwise operations #288
Change-Id: I499a1c57400f13ebdf056d093228501f4eb2cb31
Šī revīzija ir iekļauta:
revīziju iesūtīja
Rakesh Roy
vecāks
f9cf87fe60
revīzija
fa5ba557a5
@@ -35,6 +35,9 @@ set(TEST_SRC
|
||||
unsafeAtomicMax.cc
|
||||
atomicExch.cc
|
||||
atomicExch_system.cc
|
||||
__hip_atomic_fetch_and.cc
|
||||
__hip_atomic_fetch_or.cc
|
||||
__hip_atomic_fetch_xor.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "nvidia")
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_and __hip_atomic_fetch_and
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic AND with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_and.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_and_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinAnd,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_or __hip_atomic_fetch_or
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic OR with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_or.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_or_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinOr,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bitwise_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup __hip_atomic_fetch_xor __hip_atomic_fetch_xor
|
||||
* @{
|
||||
* @ingroup AtomicsTest
|
||||
*/
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WAVEFRONT from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Wavefront_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WAVEFRONT>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on the same
|
||||
* address.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_SameAddress", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Same address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(1, sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on adjacent
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Adjacent_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Adjacent address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
sizeof(TestType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Performs a builtin atomic XOR with memory scope WORKGROUP from multiple threads on scattered
|
||||
* addresses.
|
||||
* - Uses only one device and launches one kernel.
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - unit/atomics/__hip_atomic_fetch_xor.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEMPLATE_TEST_CASE("Unit___hip_atomic_fetch_xor_Positive_Workgroup_Scattered_Addresses", "", int,
|
||||
unsigned int, unsigned long, unsigned long long) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
const auto cache_line_size = 128u;
|
||||
|
||||
for (auto current = 0; current < cmd_options.iterations; ++current) {
|
||||
DYNAMIC_SECTION("Scattered address " << current) {
|
||||
Bitwise::SingleDeviceSingleKernelTest<TestType, Bitwise::AtomicOperation::kBuiltinXor,
|
||||
__HIP_MEMORY_SCOPE_WORKGROUP>(warp_size,
|
||||
cache_line_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,10 +22,10 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmd_options.hh>
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip/hip_cooperative_groups.h>
|
||||
#include <resource_guards.hh>
|
||||
#include <cmd_options.hh>
|
||||
|
||||
namespace cg = cooperative_groups;
|
||||
|
||||
@@ -37,6 +37,9 @@ enum class AtomicOperation {
|
||||
kOrSystem,
|
||||
kXor,
|
||||
kXorSystem,
|
||||
kBuiltinAnd,
|
||||
kBuiltinOr,
|
||||
kBuiltinXor
|
||||
};
|
||||
|
||||
constexpr auto kMask = 0xAAAA;
|
||||
@@ -52,7 +55,7 @@ __host__ __device__ TestType GetTestValue() {
|
||||
return kTestValue;
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
const auto mask = kMask;
|
||||
|
||||
@@ -68,10 +71,17 @@ __device__ TestType PerformAtomicOperation(TestType* const mem) {
|
||||
return atomicXor(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kXorSystem) {
|
||||
return atomicXor_system(mem, mask);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinAnd) {
|
||||
return __hip_atomic_fetch_and(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinOr) {
|
||||
return __hip_atomic_fetch_or(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
} else if constexpr (operation == AtomicOperation::kBuiltinXor) {
|
||||
return __hip_atomic_fetch_xor(mem, mask, __ATOMIC_RELAXED, memory_scope);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem>
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals) {
|
||||
__shared__ TestType shared_mem;
|
||||
|
||||
@@ -84,7 +94,7 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals)
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation>(mem);
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(mem);
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
@@ -99,7 +109,16 @@ __host__ __device__ TestType* PitchedOffset(TestType* const ptr, const unsigned
|
||||
return reinterpret_cast<TestType*>(byte_ptr + idx * pitch);
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem>
|
||||
__device__ void GenerateMemoryTraffic(uint8_t* const begin_addr, uint8_t* const end_addr) {
|
||||
for (volatile uint8_t* addr = begin_addr; addr != end_addr; ++addr) {
|
||||
uint8_t val = *addr;
|
||||
val ^= 0xAB;
|
||||
*addr = val;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
__global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
|
||||
const unsigned int width, const unsigned pitch) {
|
||||
extern __shared__ uint8_t shared_mem[];
|
||||
@@ -116,8 +135,18 @@ __global__ void TestKernel(TestType* const global_mem, TestType* const old_vals,
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
old_vals[tid] =
|
||||
PerformAtomicOperation<TestType, operation>(PitchedOffset(mem, pitch, tid % width));
|
||||
const auto n = cooperative_groups::this_grid().size() - width;
|
||||
|
||||
TestType* atomic_addr = PitchedOffset(mem, pitch, tid % width);
|
||||
|
||||
if (tid < n) {
|
||||
old_vals[tid] = PerformAtomicOperation<TestType, operation, memory_scope>(
|
||||
PitchedOffset(mem, pitch, tid % width));
|
||||
} else {
|
||||
uint8_t* const begin_addr = reinterpret_cast<uint8_t*>(atomic_addr + 1);
|
||||
uint8_t* const end_addr = reinterpret_cast<uint8_t*>(atomic_addr) + pitch;
|
||||
GenerateMemoryTraffic(begin_addr, end_addr);
|
||||
}
|
||||
|
||||
if constexpr (use_shared_mem) {
|
||||
__syncthreads();
|
||||
@@ -157,13 +186,16 @@ std::tuple<std::vector<TestType>, std::vector<TestType>> TestKernelHostRef(const
|
||||
auto& res = res_vals[tid % p.width];
|
||||
old_vals.push_back(res);
|
||||
|
||||
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem) {
|
||||
if constexpr (operation == AtomicOperation::kAnd || operation == AtomicOperation::kAndSystem ||
|
||||
operation == AtomicOperation::kBuiltinAnd) {
|
||||
res = res & mask;
|
||||
} else if constexpr (operation == AtomicOperation::kOr ||
|
||||
operation == AtomicOperation::kOrSystem) {
|
||||
operation == AtomicOperation::kOrSystem ||
|
||||
operation == AtomicOperation::kBuiltinOr) {
|
||||
res = res | mask;
|
||||
} else if constexpr (operation == AtomicOperation::kXor ||
|
||||
operation == AtomicOperation::kXorSystem) {
|
||||
operation == AtomicOperation::kXorSystem ||
|
||||
operation == AtomicOperation::kBuiltinXor) {
|
||||
res = res ^ mask;
|
||||
}
|
||||
}
|
||||
@@ -188,19 +220,21 @@ void Verify(const TestParams& p, std::vector<TestType>& res_vals, std::vector<Te
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem>
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void LaunchKernel(const TestParams& p, hipStream_t stream, TestType* const mem_ptr,
|
||||
TestType* const old_vals) {
|
||||
const auto shared_mem_size = use_shared_mem ? p.width * p.pitch : 0u;
|
||||
if (p.width == 1 && p.pitch == sizeof(TestType))
|
||||
TestKernel<TestType, operation, use_shared_mem>
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals);
|
||||
else
|
||||
TestKernel<TestType, operation, use_shared_mem>
|
||||
TestKernel<TestType, operation, use_shared_mem, memory_scope>
|
||||
<<<p.blocks, p.threads, shared_mem_size, stream>>>(mem_ptr, old_vals, p.width, p.pitch);
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem>
|
||||
template <typename TestType, AtomicOperation operation, bool use_shared_mem,
|
||||
int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void TestCore(const TestParams& p) {
|
||||
const auto old_vals_alloc_size = p.kernel_count * p.ThreadCount() * sizeof(TestType);
|
||||
std::vector<LinearAllocGuard<TestType>> old_vals_devs;
|
||||
@@ -232,7 +266,8 @@ void TestCore(const TestParams& p) {
|
||||
for (auto j = 0u; j < p.kernel_count; ++j) {
|
||||
const auto& stream = streams[i * p.kernel_count + j].stream();
|
||||
const auto old_vals = old_vals_devs[i].ptr() + j * p.ThreadCount();
|
||||
LaunchKernel<TestType, operation, use_shared_mem>(p, stream, mem_dev.ptr(), old_vals);
|
||||
LaunchKernel<TestType, operation, use_shared_mem, memory_scope>(p, stream, mem_dev.ptr(),
|
||||
old_vals);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -247,17 +282,48 @@ void TestCore(const TestParams& p) {
|
||||
Verify<TestType, operation>(p, res_vals, old_vals);
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation>
|
||||
inline dim3 GenerateThreadDimensions() { return GENERATE(dim3(16), dim3(1024)); }
|
||||
|
||||
inline dim3 GenerateBlockDimensions() {
|
||||
int sm_count = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&sm_count, hipDeviceAttributeMultiprocessorCount, 0));
|
||||
return GENERATE_COPY(dim3(sm_count), dim3(sm_count + sm_count / 2));
|
||||
}
|
||||
|
||||
template <typename TestType, AtomicOperation operation, int memory_scope = __HIP_MEMORY_SCOPE_AGENT>
|
||||
void SingleDeviceSingleKernelTest(const unsigned int width, const unsigned int pitch) {
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = 1;
|
||||
params.threads = GENERATE(dim3(1023));
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD) {
|
||||
params.threads = 1;
|
||||
} else if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT) {
|
||||
int warp_size = 0;
|
||||
HIP_CHECK(hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0));
|
||||
params.threads = dim3(warp_size);
|
||||
} else {
|
||||
params.threads = GenerateThreadDimensions();
|
||||
}
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
SECTION("Global memory") {
|
||||
params.blocks = GENERATE(dim3(3));
|
||||
if constexpr ((operation == AtomicOperation::kBuiltinAnd ||
|
||||
operation == AtomicOperation::kBuiltinOr ||
|
||||
operation == AtomicOperation::kBuiltinXor) &&
|
||||
(memory_scope == __HIP_MEMORY_SCOPE_SINGLETHREAD ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WAVEFRONT ||
|
||||
memory_scope == __HIP_MEMORY_SCOPE_WORKGROUP)) {
|
||||
params.blocks = dim3(1);
|
||||
} else {
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
}
|
||||
using LA = LinearAllocs;
|
||||
for (const auto alloc_type :
|
||||
{LA::hipMalloc, LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
@@ -288,8 +354,8 @@ void SingleDeviceMultipleKernelTest(const unsigned int kernel_count, const unsig
|
||||
TestParams params;
|
||||
params.num_devices = 1;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GENERATE(dim3(3));
|
||||
params.threads = GENERATE(dim3(1023));
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
@@ -329,8 +395,8 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
|
||||
TestParams params;
|
||||
params.num_devices = num_devices;
|
||||
params.kernel_count = kernel_count;
|
||||
params.blocks = GENERATE(dim3(3));
|
||||
params.threads = GENERATE(dim3(1023));
|
||||
params.blocks = GenerateBlockDimensions();
|
||||
params.threads = GenerateThreadDimensions();
|
||||
params.width = width;
|
||||
params.pitch = pitch;
|
||||
|
||||
@@ -338,8 +404,9 @@ void MultipleDeviceMultipleKernelTest(const unsigned int num_devices,
|
||||
for (const auto alloc_type : {LA::hipHostMalloc, LA::hipMallocManaged, LA::mallocAndRegister}) {
|
||||
params.alloc_type = alloc_type;
|
||||
DYNAMIC_SECTION("Allocation type: " << to_string(alloc_type)) {
|
||||
TestCore<TestType, operation, false>(params);
|
||||
TestCore<TestType, operation, false, __HIP_MEMORY_SCOPE_SYSTEM>(params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Bitwise
|
||||
|
||||
Atsaukties uz šo jaunā problēmā
Block a user