EXSWHTEC-252 - Implement Kernel Launch Performance Tests #139
Change-Id: Ib27db722a31ac0cd7ad1942722c6eba62087defb
[ROCm/hip-tests commit: 2be6860916]
This commit is contained in:
committed by
Rakesh Roy
orang tua
18e922a48a
melakukan
a2e616488e
@@ -18,6 +18,9 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
add_subdirectory(memset)
|
||||
add_subdirectory(memcpy)
|
||||
add_subdirectory(kernelLaunch)
|
||||
add_subdirectory(stream)
|
||||
add_subdirectory(event)
|
||||
add_subdirectory(example)
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
set(TEST_SRC
|
||||
kernel_launch_common.cc
|
||||
triple_chevron.cc
|
||||
hipLaunchKernel.cc
|
||||
hipLaunchCooperativeKernel.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC ${TEST_SRC}
|
||||
hipExtLaunchKernel.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
hip_add_exe_to_target(NAME KernelLaunchPerformance
|
||||
TEST_SRC ${TEST_SRC}
|
||||
TEST_TARGET_NAME build_tests
|
||||
COMPILE_OPTIONS -std=c++17)
|
||||
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class ExtLaunchKernelBenchmark
|
||||
: public KernelLaunchBenchmark<ExtLaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr,
|
||||
events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
|
||||
small_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
|
||||
medium_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ = hipExtLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
|
||||
large_kernel_args_, 0, nullptr, events_[0], events_[1], 0u);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
EventsGuard events_{2};
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
ExtLaunchKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipExtLaunchKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipExtLaunchKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipExtLaunchKernel") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <utils.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class LaunchCooperativeKernelBenchmark
|
||||
: public KernelLaunchBenchmark<LaunchCooperativeKernelBenchmark<kernel_type, timer_type>,
|
||||
timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipLaunchCooperativeKernel(reinterpret_cast<void*>(NullKernel), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, nullptr, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithSmallArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, small_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithMediumArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, medium_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ =
|
||||
hipLaunchCooperativeKernel(reinterpret_cast<void*>(KernelWithLargeArgs), dim3{1, 1, 1},
|
||||
dim3{1, 1, 1}, large_kernel_args_, 0, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
LaunchCooperativeKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipLaunchCooperativeKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipLaunchCooperativeKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - Device supports CooperativeLaunch
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipLaunchCooperativeKernel") {
|
||||
if (!DeviceAttributesSupport(0, hipDeviceAttributeCooperativeLaunch)) {
|
||||
HipTest::HIP_SKIP_TEST("CooperativeLaunch not supported");
|
||||
return;
|
||||
}
|
||||
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class LaunchKernelBenchmark
|
||||
: public KernelLaunchBenchmark<LaunchKernelBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(NullKernel), 1, 1, nullptr, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithSmallArgs), 1, 1,
|
||||
small_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithMediumArgs), 1, 1,
|
||||
medium_kernel_args_, 0, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
error_ = hipLaunchKernel(reinterpret_cast<void*>(KernelWithLargeArgs), 1, 1,
|
||||
large_kernel_args_, 0, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
|
||||
hipError_t GetError() { return error_; }
|
||||
|
||||
private:
|
||||
hipError_t error_;
|
||||
|
||||
char* out_ = nullptr;
|
||||
void* small_kernel_args_[2] = {&small_kernel_args, &out_};
|
||||
void* medium_kernel_args_[2] = {&medium_kernel_args, &out_};
|
||||
void* large_kernel_args_[2] = {&large_kernel_args, &out_};
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
LaunchKernelBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(benchmark.GetError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using hipLaunchKernel:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/hipLaunchKernel.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_hipLaunchKernel") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#define DO_NOT_OPTIMIZE_AWAY \
|
||||
unsigned i = blockIdx.x * blockDim.x + threadIdx.x; \
|
||||
if (out) *out = args.args[i];
|
||||
|
||||
__global__ void NullKernel() {}
|
||||
|
||||
__global__ void KernelWithSmallArgs(SmallKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
__global__ void KernelWithMediumArgs(MediumKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
__global__ void KernelWithLargeArgs(LargeKernelArgs args, char* out) { DO_NOT_OPTIMIZE_AWAY; }
|
||||
|
||||
SmallKernelArgs small_kernel_args;
|
||||
MediumKernelArgs medium_kernel_args;
|
||||
LargeKernelArgs large_kernel_args;
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
#include <performance_common.hh>
|
||||
|
||||
struct SmallKernelArgs {
|
||||
char args[16];
|
||||
};
|
||||
|
||||
struct MediumKernelArgs {
|
||||
char args[256];
|
||||
};
|
||||
|
||||
struct LargeKernelArgs {
|
||||
char args[4096];
|
||||
};
|
||||
|
||||
extern SmallKernelArgs small_kernel_args;
|
||||
extern MediumKernelArgs medium_kernel_args;
|
||||
extern LargeKernelArgs large_kernel_args;
|
||||
|
||||
__global__ void NullKernel();
|
||||
|
||||
__global__ void KernelWithSmallArgs(SmallKernelArgs, char*);
|
||||
|
||||
__global__ void KernelWithMediumArgs(MediumKernelArgs, char*);
|
||||
|
||||
__global__ void KernelWithLargeArgs(LargeKernelArgs, char*);
|
||||
|
||||
enum class KernelType { kNull = 0, kSmall, kMedium, kLarge };
|
||||
|
||||
template <typename Derived, bool timer_type>
|
||||
class KernelLaunchBenchmark : public Benchmark<KernelLaunchBenchmark<Derived, timer_type>> {
|
||||
public:
|
||||
void operator()(bool sync = true) {
|
||||
auto& derived = static_cast<Derived&>(*this);
|
||||
|
||||
if (sync) {
|
||||
TIMED_SECTION(timer_type) { derived.LaunchKernel(); }
|
||||
} else {
|
||||
if (this->current() != this->kWarmup) // if not warmup
|
||||
RunWithoutSynchronization();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void RunWithoutSynchronization() {
|
||||
auto iterations = this->iterations();
|
||||
auto warmups = this->warmups();
|
||||
|
||||
// manually handle iterations here to avoid synchronization after each iteration
|
||||
this->Configure(1, 0);
|
||||
|
||||
this->RegisterModifier([iterations](float time) { return time / iterations; });
|
||||
|
||||
auto& derived = static_cast<Derived&>(*this);
|
||||
|
||||
for (size_t i = 0u; i < warmups; ++i) {
|
||||
derived.LaunchKernel();
|
||||
}
|
||||
|
||||
TIMED_SECTION(timer_type) {
|
||||
for (size_t i = 0u; i < iterations; ++i) {
|
||||
derived.LaunchKernel();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static std::string GetSynchronizationSectionName(bool sync) {
|
||||
return sync ? "with synchronization" : "without synchronization";
|
||||
}
|
||||
|
||||
template <KernelType kernel_type> std::string GetKernelTypeSectionName() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
return "null kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
return "small kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
return "medium kernel";
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
return "large kernel";
|
||||
} else {
|
||||
return "unknown kernel type";
|
||||
}
|
||||
}
|
||||
|
||||
template <bool timer_type> std::string GetTimerTypeSectionName() {
|
||||
if constexpr (timer_type == kTimerTypeEvent) {
|
||||
return "event based";
|
||||
} else {
|
||||
return "cpu based";
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "kernel_launch_common.hh"
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
/**
|
||||
* @addtogroup kernelLaunch kernel launch
|
||||
* @{
|
||||
* @ingroup PerformanceTest
|
||||
* Contains performance tests for kernel launch overhead benchmarking.
|
||||
*/
|
||||
|
||||
template <KernelType kernel_type, bool timer_type>
|
||||
class TripleChevronBenchmark
|
||||
: public KernelLaunchBenchmark<TripleChevronBenchmark<kernel_type, timer_type>, timer_type> {
|
||||
public:
|
||||
constexpr void LaunchKernel() {
|
||||
if constexpr (kernel_type == KernelType::kNull) {
|
||||
NullKernel<<<1, 1>>>();
|
||||
} else if constexpr (kernel_type == KernelType::kSmall) {
|
||||
KernelWithSmallArgs<<<1, 1>>>(small_kernel_args, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kMedium) {
|
||||
KernelWithMediumArgs<<<1, 1>>>(medium_kernel_args, nullptr);
|
||||
} else if constexpr (kernel_type == KernelType::kLarge) {
|
||||
KernelWithLargeArgs<<<1, 1>>>(large_kernel_args, nullptr);
|
||||
} else
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
template <KernelType kernel_type, bool timer_type> static void RunBenchmark(bool sync) {
|
||||
TripleChevronBenchmark<kernel_type, timer_type> benchmark;
|
||||
benchmark.AddSectionName(GetSynchronizationSectionName(sync));
|
||||
benchmark.AddSectionName(GetKernelTypeSectionName<kernel_type>());
|
||||
benchmark.AddSectionName(GetTimerTypeSectionName<timer_type>());
|
||||
benchmark.Run(sync);
|
||||
HIP_CHECK(hipGetLastError());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Calls an empty kernel using triple chevron annotation:
|
||||
* -# With different timing methods:
|
||||
* - CPU-based
|
||||
* - Event-based
|
||||
* -# With different synchronization behavior:
|
||||
* - Using a stream synchronization between each iteration
|
||||
* - Without any synchronization between iterations
|
||||
* -# With different kernel argument sizes
|
||||
* Test source
|
||||
* ------------------------
|
||||
* - performance/kernelLaunch/triple_chevron.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 5.2
|
||||
*/
|
||||
TEST_CASE("Performance_Triple_Chevron") {
|
||||
bool sync = GENERATE(true, false);
|
||||
|
||||
SECTION("null kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kNull, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("small kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kSmall, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("medium kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kMedium, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
|
||||
SECTION("large kernel") {
|
||||
SECTION("cpu-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeCpu>(sync); }
|
||||
|
||||
SECTION("event-based timing") { RunBenchmark<KernelType::kLarge, kTimerTypeEvent>(sync); }
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user