From 5fbbdcae68c55b8f308c5ce8d7f9ba4dd0cf3b9c Mon Sep 17 00:00:00 2001 From: milos-mozetic <118800401+milos-mozetic@users.noreply.github.com> Date: Tue, 18 Jul 2023 09:23:27 +0200 Subject: [PATCH] EXSWHTEC-231 - Implement custom Benchmarking tool for Performance tests (#117) - Introduce performance tests to project. - Improve benchmarking utilities. - Delete copy constructors from Timer and Benchmark classes. - Disable Catch2's benchmarking functionalities. - Address review comments and add progress bar/display output to the Benchmarking tool - Add flushing of the buffer on the benchmark name display - Introduce command line options. - Add allocation type to string helper function. - Add output modifier to Benchmark class. - Fix invalid calculation of deviation - Update performance_common.hh - Resolve build error on Windows by adding include for reduce and accumulate --- catch/CMakeLists.txt | 1 + catch/hipTestMain/main.cc | 32 ++- catch/include/cmd_options.hh | 33 +++ catch/include/performance_common.hh | 247 +++++++++++++++++++++++ catch/performance/CMakeLists.txt | 21 ++ catch/performance/example/CMakeLists.txt | 28 +++ catch/performance/example/example.cc | 59 ++++++ 7 files changed, 419 insertions(+), 2 deletions(-) create mode 100644 catch/include/cmd_options.hh create mode 100644 catch/include/performance_common.hh create mode 100644 catch/performance/CMakeLists.txt create mode 100644 catch/performance/example/CMakeLists.txt create mode 100644 catch/performance/example/example.cc diff --git a/catch/CMakeLists.txt b/catch/CMakeLists.txt index 1b53c746d3..f9078da432 100644 --- a/catch/CMakeLists.txt +++ b/catch/CMakeLists.txt @@ -260,6 +260,7 @@ add_subdirectory(TypeQualifiers ${CATCH_BUILD_DIR}/TypeQualifiers) if(UNIX) add_subdirectory(multiproc ${CATCH_BUILD_DIR}/multiproc) endif() +add_subdirectory(performance ${CATCH_BUILD_DIR}/performance) cmake_policy(POP) diff --git a/catch/hipTestMain/main.cc b/catch/hipTestMain/main.cc index ea3bb9bd23..030267f04c 100644 --- a/catch/hipTestMain/main.cc +++ b/catch/hipTestMain/main.cc @@ -1,7 +1,10 @@ #define CATCH_CONFIG_RUNNER +#include #include #include +CmdOptions cmd_options; + int main(int argc, char** argv) { auto& context = TestContext::get(argc, argv); if (context.skipTest()) { @@ -9,8 +12,33 @@ int main(int argc, char** argv) { std::cout << "HIP_SKIP_THIS_TEST" << std::endl; return 0; } - int out = Catch::Session().run(argc, argv); + + Catch::Session session; + + using namespace Catch::clara; + // clang-format off + auto cli = session.cli() + | Opt(cmd_options.iterations, "iterations") + ["-I"]["--iterations"] + ("Number of iterations used for performance tests (default: 1000)") + | Opt(cmd_options.warmups, "warmups") + ["-W"]["--warmups"] + ("Number of warmup iterations used for performance tests (default: 100)") + | Opt(cmd_options.no_display) + ["-S"]["--no-display"] + ("Do not display the output of performance tests") + | Opt(cmd_options.progress) + ["-P"]["--progress"] + ("Show progress bar when running performance tests") + | Opt(cmd_options.extended_run) + ["-E"]["--extended-run"] + ("TODO: Description goes here") + ; + // clang-format on + + session.cli(cli); + + int out = session.run(argc, argv); TestContext::get().cleanContext(); return out; - } diff --git a/catch/include/cmd_options.hh b/catch/include/cmd_options.hh new file mode 100644 index 0000000000..5dbd2f300c --- /dev/null +++ b/catch/include/cmd_options.hh @@ -0,0 +1,33 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +struct CmdOptions { + int iterations = 1000; + int warmups = 100; + bool no_display = false; + bool progress = false; + bool extended_run = false; +}; + +extern CmdOptions cmd_options; \ No newline at end of file diff --git a/catch/include/performance_common.hh b/catch/include/performance_common.hh new file mode 100644 index 0000000000..a40d6b3f5c --- /dev/null +++ b/catch/include/performance_common.hh @@ -0,0 +1,247 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if defined(_WIN32) +#if defined(_WIN64) +typedef __int64 ssize_t; +#else // !_WIN64 +typedef __int32 ssize_t; +#endif // !_WIN64 +#endif /*_WIN32*/ + +class Timer { + public: + Timer(const Timer&) = delete; + Timer& operator=(const Timer&) = delete; + + protected: + Timer(float& time, hipStream_t stream) : time_(time), stream_(stream) {} + + void Record(float time) { time_ += time; } + + hipStream_t GetStream() const { return stream_; } + + private: + float& time_; + hipStream_t stream_; +}; + +class EventTimer : public Timer { + public: + EventTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) { + HIP_CHECK(hipEventCreate(&start_)); + HIP_CHECK(hipEventCreate(&stop_)); + HIP_CHECK(hipEventRecord(start_, GetStream())); + } + + ~EventTimer() { + hipError_t error; // to avoid compiler warnings + + error = hipEventRecord(stop_, GetStream()); + error = hipEventSynchronize(stop_); + + float ms; + error = hipEventElapsedTime(&ms, start_, stop_); + Record(ms); + + error = hipEventDestroy(start_); + error = hipEventDestroy(stop_); + } + + private: + hipEvent_t start_; + hipEvent_t stop_; +}; + +class CpuTimer : public Timer { + public: + CpuTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) { + start_ = std::chrono::steady_clock::now(); + } + + ~CpuTimer() { + hipError_t error; // to avoid compiler warnings + error = hipStreamSynchronize(GetStream()); + + stop_ = std::chrono::steady_clock::now(); + + std::chrono::duration ms = stop_ - start_; + Record(ms.count()); + } + + private: + std::chrono::time_point start_; + std::chrono::time_point stop_; +}; + +template class Benchmark { + public: + Benchmark() + : iterations_(cmd_options.iterations), + warmups_(cmd_options.warmups), + display_output_(!cmd_options.no_display), + progress_bar_(cmd_options.progress) { + benchmark_name_ = Catch::getResultCapture().getCurrentTestName(); + } + + Benchmark(const Benchmark&) = delete; + Benchmark& operator=(const Benchmark&) = delete; + + static constexpr ssize_t kWarmup = -1; + + void Configure(size_t iterations, size_t warmups) { + iterations_ = iterations; + warmups_ = warmups; + } + + void AddSectionName(const std::string& section_name) { benchmark_name_ += "/" + section_name; } + + using ModifierSignature = std::function; + void RegisterModifier(const ModifierSignature& modifier) { modifier_ = modifier; } + + template std::tuple Run(Args&&... args) { + AddSectionName(std::to_string(iterations_)); + AddSectionName(std::to_string(warmups_)); + + auto& derived = static_cast(*this); + + current_ = kWarmup; + for (size_t i = 0u; i < warmups_; ++i) { + PrintProgress("warmup", static_cast(100.f * (i + 1) / warmups_)); + derived(args...); + } + time_ = .0; + + std::vector samples; + samples.reserve(iterations_); + + for (current_ = 0; current_ < iterations_; ++current_) { + PrintProgress("measurement", static_cast(100.f * (current_ + 1) / iterations_)); + derived(args...); + if (modifier_) time_ = modifier_(time_); + samples.push_back(time_); + time_ = .0; + } + + float sum = std::reduce(cbegin(samples), cend(samples)); + float mean = sum / samples.size(); + + float deviation = + std::accumulate(cbegin(samples), cend(samples), .0, + [mean](float sum, float next) { return sum + std::pow(next - mean, 2); }); + deviation = sqrt(deviation / samples.size()); + + float best = *std::min_element(cbegin(samples), cend(samples)); + float worst = *std::max_element(cbegin(samples), cend(samples)); + + PrintStats(mean, deviation, best, worst); + + return {mean, deviation, best, worst}; + } + + protected: + template + using TimerType = std::conditional_t; + + template + std::unique_ptr> GetTimer(hipStream_t stream = nullptr) { + return std::make_unique>(time_, stream); + } + + float time() const { return time_; } + + size_t iterations() const { return iterations_; } + + size_t warmups() const { return warmups_; } + + ssize_t current() const { return current_; } + + private: + std::string benchmark_name_; + float time_; + size_t iterations_; + size_t warmups_; + ssize_t current_; + bool display_output_; + bool progress_bar_; + + ModifierSignature modifier_; + + void Print(const std::string& out = "") { + if (!display_output_) return; + std::cout << "\r" << std::setw(110) << std::left << benchmark_name_ << "\t|\t" << out + << std::flush; + } + + void PrintProgress(const std::string& name, int progress) { + if (!(display_output_ && progress_bar_)) return; + Print(name + ": [" + std::to_string(progress) + "%]"); + } + + void PrintStats(float mean, float deviation, float best, float worst) { + if (!display_output_) return; + Print("Average time: " + std::to_string(mean) + " ms, Standard deviation: " + + std::to_string(deviation) + " ms, Fastest: " + std::to_string(best) + + " ms, Slowest: " + std::to_string(worst) + " ms\n"); + } +}; + +constexpr bool kTimerTypeCpu = false; +constexpr bool kTimerTypeEvent = true; + +#define TIMED_SECTION_STREAM(TIMER_TYPE, STREAM) \ + if (auto _ = this->template GetTimer(STREAM); true) +#define TIMED_SECTION(TIMER_TYPE) TIMED_SECTION_STREAM(TIMER_TYPE, nullptr) + +constexpr size_t operator"" _KB(unsigned long long int kb) { return kb << 10; } + +constexpr size_t operator"" _MB(unsigned long long int mb) { return mb << 20; } + +constexpr size_t operator"" _GB(unsigned long long int gb) { return gb << 30; } + +static std::string GetAllocationSectionName(LinearAllocs allocation_type) { + switch (allocation_type) { + case LinearAllocs::malloc: + return "host pageable"; + case LinearAllocs::hipHostMalloc: + return "host pinned"; + case LinearAllocs::hipMalloc: + return "device malloc"; + case LinearAllocs::hipMallocManaged: + return "managed"; + default: + return "unknown alloc type"; + } +} diff --git a/catch/performance/CMakeLists.txt b/catch/performance/CMakeLists.txt new file mode 100644 index 0000000000..5412636ebc --- /dev/null +++ b/catch/performance/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +add_subdirectory(example) diff --git a/catch/performance/example/CMakeLists.txt b/catch/performance/example/CMakeLists.txt new file mode 100644 index 0000000000..001a9e7b1d --- /dev/null +++ b/catch/performance/example/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +set(TEST_SRC + example.cc +) + +hip_add_exe_to_target(NAME ExamplePerformance + TEST_SRC ${TEST_SRC} + TEST_TARGET_NAME build_tests + COMPILE_OPTIONS -std=c++17) diff --git a/catch/performance/example/example.cc b/catch/performance/example/example.cc new file mode 100644 index 0000000000..2ced78b023 --- /dev/null +++ b/catch/performance/example/example.cc @@ -0,0 +1,59 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include +#include +#include + +class ExampleBenchmark : public Benchmark { + public: + void operator()(void* dst) { + const int value = 42; + const size_t kSize = 4_MB; + + TIMED_SECTION(kTimerTypeEvent) { // event based timing + HIP_CHECK(hipMemset(dst, value, kSize)); + } + + HIP_CHECK(hipMemset(dst, 0, kSize)); // not timed + + TIMED_SECTION(kTimerTypeCpu) { // cpu based timing + HIP_CHECK(hipMemset(dst, value, kSize)); + } + + // accessing properties + // std::cout << "Time recorded up until now: " << time() << std::endl; + // std::cout << "Number of iterations: " << iterations() << std::endl; + // std::cout << "Number of warmup iterations: " << warmups() << std::endl; + // std::cout << "Current iteration: " << current() << std::endl; + } +}; + +TEST_CASE("Performance_Example") { + ExampleBenchmark benchmark; + + // to override cmd options + // benchmark.Configure(10000 /* iterations */, 1000 /* warmups */); + + LinearAllocGuard dst(LinearAllocs::hipMalloc, 4_MB); + benchmark.Run(dst.ptr()); +}