EXSWHTEC-231 - Implement custom Benchmarking tool for Performance tests (#117)

- Introduce performance tests to project.
- Improve benchmarking utilities.
- Delete copy constructors from Timer and Benchmark classes.
- Disable Catch2's benchmarking functionalities.
- Address review comments and add progress bar/display output to the Benchmarking tool
- Add flushing of the buffer on the benchmark name display
- Introduce command line options.
- Add allocation type to string helper function.
- Add output modifier to Benchmark class.
- Fix invalid calculation of deviation
- Update performance_common.hh
- Resolve build error on Windows by adding include for reduce and accumulate
Esse commit está contido em:
milos-mozetic
2023-07-18 09:23:27 +02:00
commit de GitHub
commit 5fbbdcae68
7 arquivos alterados com 419 adições e 2 exclusões
+1
Ver Arquivo
@@ -260,6 +260,7 @@ add_subdirectory(TypeQualifiers ${CATCH_BUILD_DIR}/TypeQualifiers)
if(UNIX)
add_subdirectory(multiproc ${CATCH_BUILD_DIR}/multiproc)
endif()
add_subdirectory(performance ${CATCH_BUILD_DIR}/performance)
cmake_policy(POP)
+30 -2
Ver Arquivo
@@ -1,7 +1,10 @@
#define CATCH_CONFIG_RUNNER
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <iostream>
CmdOptions cmd_options;
int main(int argc, char** argv) {
auto& context = TestContext::get(argc, argv);
if (context.skipTest()) {
@@ -9,8 +12,33 @@ int main(int argc, char** argv) {
std::cout << "HIP_SKIP_THIS_TEST" << std::endl;
return 0;
}
int out = Catch::Session().run(argc, argv);
Catch::Session session;
using namespace Catch::clara;
// clang-format off
auto cli = session.cli()
| Opt(cmd_options.iterations, "iterations")
["-I"]["--iterations"]
("Number of iterations used for performance tests (default: 1000)")
| Opt(cmd_options.warmups, "warmups")
["-W"]["--warmups"]
("Number of warmup iterations used for performance tests (default: 100)")
| Opt(cmd_options.no_display)
["-S"]["--no-display"]
("Do not display the output of performance tests")
| Opt(cmd_options.progress)
["-P"]["--progress"]
("Show progress bar when running performance tests")
| Opt(cmd_options.extended_run)
["-E"]["--extended-run"]
("TODO: Description goes here")
;
// clang-format on
session.cli(cli);
int out = session.run(argc, argv);
TestContext::get().cleanContext();
return out;
}
+33
Ver Arquivo
@@ -0,0 +1,33 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
struct CmdOptions {
int iterations = 1000;
int warmups = 100;
bool no_display = false;
bool progress = false;
bool extended_run = false;
};
extern CmdOptions cmd_options;
+247
Ver Arquivo
@@ -0,0 +1,247 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <algorithm>
#include <chrono>
#include <memory>
#include <numeric>
#include <type_traits>
#include <vector>
#include <cmd_options.hh>
#include <hip_test_common.hh>
#include <resource_guards.hh>
#if defined(_WIN32)
#if defined(_WIN64)
typedef __int64 ssize_t;
#else // !_WIN64
typedef __int32 ssize_t;
#endif // !_WIN64
#endif /*_WIN32*/
class Timer {
public:
Timer(const Timer&) = delete;
Timer& operator=(const Timer&) = delete;
protected:
Timer(float& time, hipStream_t stream) : time_(time), stream_(stream) {}
void Record(float time) { time_ += time; }
hipStream_t GetStream() const { return stream_; }
private:
float& time_;
hipStream_t stream_;
};
class EventTimer : public Timer {
public:
EventTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
HIP_CHECK(hipEventCreate(&start_));
HIP_CHECK(hipEventCreate(&stop_));
HIP_CHECK(hipEventRecord(start_, GetStream()));
}
~EventTimer() {
hipError_t error; // to avoid compiler warnings
error = hipEventRecord(stop_, GetStream());
error = hipEventSynchronize(stop_);
float ms;
error = hipEventElapsedTime(&ms, start_, stop_);
Record(ms);
error = hipEventDestroy(start_);
error = hipEventDestroy(stop_);
}
private:
hipEvent_t start_;
hipEvent_t stop_;
};
class CpuTimer : public Timer {
public:
CpuTimer(float& time, hipStream_t stream = nullptr) : Timer(time, stream) {
start_ = std::chrono::steady_clock::now();
}
~CpuTimer() {
hipError_t error; // to avoid compiler warnings
error = hipStreamSynchronize(GetStream());
stop_ = std::chrono::steady_clock::now();
std::chrono::duration<float, std::milli> ms = stop_ - start_;
Record(ms.count());
}
private:
std::chrono::time_point<std::chrono::steady_clock> start_;
std::chrono::time_point<std::chrono::steady_clock> stop_;
};
template <typename Derived> class Benchmark {
public:
Benchmark()
: iterations_(cmd_options.iterations),
warmups_(cmd_options.warmups),
display_output_(!cmd_options.no_display),
progress_bar_(cmd_options.progress) {
benchmark_name_ = Catch::getResultCapture().getCurrentTestName();
}
Benchmark(const Benchmark&) = delete;
Benchmark& operator=(const Benchmark&) = delete;
static constexpr ssize_t kWarmup = -1;
void Configure(size_t iterations, size_t warmups) {
iterations_ = iterations;
warmups_ = warmups;
}
void AddSectionName(const std::string& section_name) { benchmark_name_ += "/" + section_name; }
using ModifierSignature = std::function<float(float)>;
void RegisterModifier(const ModifierSignature& modifier) { modifier_ = modifier; }
template <typename... Args> std::tuple<float, float, float, float> Run(Args&&... args) {
AddSectionName(std::to_string(iterations_));
AddSectionName(std::to_string(warmups_));
auto& derived = static_cast<Derived&>(*this);
current_ = kWarmup;
for (size_t i = 0u; i < warmups_; ++i) {
PrintProgress("warmup", static_cast<int>(100.f * (i + 1) / warmups_));
derived(args...);
}
time_ = .0;
std::vector<float> samples;
samples.reserve(iterations_);
for (current_ = 0; current_ < iterations_; ++current_) {
PrintProgress("measurement", static_cast<int>(100.f * (current_ + 1) / iterations_));
derived(args...);
if (modifier_) time_ = modifier_(time_);
samples.push_back(time_);
time_ = .0;
}
float sum = std::reduce(cbegin(samples), cend(samples));
float mean = sum / samples.size();
float deviation =
std::accumulate(cbegin(samples), cend(samples), .0,
[mean](float sum, float next) { return sum + std::pow(next - mean, 2); });
deviation = sqrt(deviation / samples.size());
float best = *std::min_element(cbegin(samples), cend(samples));
float worst = *std::max_element(cbegin(samples), cend(samples));
PrintStats(mean, deviation, best, worst);
return {mean, deviation, best, worst};
}
protected:
template <bool event_based>
using TimerType = std::conditional_t<event_based, EventTimer, CpuTimer>;
template <bool event_based = false>
std::unique_ptr<TimerType<event_based>> GetTimer(hipStream_t stream = nullptr) {
return std::make_unique<TimerType<event_based>>(time_, stream);
}
float time() const { return time_; }
size_t iterations() const { return iterations_; }
size_t warmups() const { return warmups_; }
ssize_t current() const { return current_; }
private:
std::string benchmark_name_;
float time_;
size_t iterations_;
size_t warmups_;
ssize_t current_;
bool display_output_;
bool progress_bar_;
ModifierSignature modifier_;
void Print(const std::string& out = "") {
if (!display_output_) return;
std::cout << "\r" << std::setw(110) << std::left << benchmark_name_ << "\t|\t" << out
<< std::flush;
}
void PrintProgress(const std::string& name, int progress) {
if (!(display_output_ && progress_bar_)) return;
Print(name + ": [" + std::to_string(progress) + "%]");
}
void PrintStats(float mean, float deviation, float best, float worst) {
if (!display_output_) return;
Print("Average time: " + std::to_string(mean) + " ms, Standard deviation: " +
std::to_string(deviation) + " ms, Fastest: " + std::to_string(best) +
" ms, Slowest: " + std::to_string(worst) + " ms\n");
}
};
constexpr bool kTimerTypeCpu = false;
constexpr bool kTimerTypeEvent = true;
#define TIMED_SECTION_STREAM(TIMER_TYPE, STREAM) \
if (auto _ = this->template GetTimer<TIMER_TYPE>(STREAM); true)
#define TIMED_SECTION(TIMER_TYPE) TIMED_SECTION_STREAM(TIMER_TYPE, nullptr)
constexpr size_t operator"" _KB(unsigned long long int kb) { return kb << 10; }
constexpr size_t operator"" _MB(unsigned long long int mb) { return mb << 20; }
constexpr size_t operator"" _GB(unsigned long long int gb) { return gb << 30; }
static std::string GetAllocationSectionName(LinearAllocs allocation_type) {
switch (allocation_type) {
case LinearAllocs::malloc:
return "host pageable";
case LinearAllocs::hipHostMalloc:
return "host pinned";
case LinearAllocs::hipMalloc:
return "device malloc";
case LinearAllocs::hipMallocManaged:
return "managed";
default:
return "unknown alloc type";
}
}
+21
Ver Arquivo
@@ -0,0 +1,21 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
add_subdirectory(example)
+28
Ver Arquivo
@@ -0,0 +1,28 @@
# Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
set(TEST_SRC
example.cc
)
hip_add_exe_to_target(NAME ExamplePerformance
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
COMPILE_OPTIONS -std=c++17)
+59
Ver Arquivo
@@ -0,0 +1,59 @@
/*
Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <performance_common.hh>
#include <resource_guards.hh>
class ExampleBenchmark : public Benchmark<ExampleBenchmark> {
public:
void operator()(void* dst) {
const int value = 42;
const size_t kSize = 4_MB;
TIMED_SECTION(kTimerTypeEvent) { // event based timing
HIP_CHECK(hipMemset(dst, value, kSize));
}
HIP_CHECK(hipMemset(dst, 0, kSize)); // not timed
TIMED_SECTION(kTimerTypeCpu) { // cpu based timing
HIP_CHECK(hipMemset(dst, value, kSize));
}
// accessing properties
// std::cout << "Time recorded up until now: " << time() << std::endl;
// std::cout << "Number of iterations: " << iterations() << std::endl;
// std::cout << "Number of warmup iterations: " << warmups() << std::endl;
// std::cout << "Current iteration: " << current() << std::endl;
}
};
TEST_CASE("Performance_Example") {
ExampleBenchmark benchmark;
// to override cmd options
// benchmark.Configure(10000 /* iterations */, 1000 /* warmups */);
LinearAllocGuard<void> dst(LinearAllocs::hipMalloc, 4_MB);
benchmark.Run(dst.ptr());
}