Removed old perftest directory (#840)
This commit is contained in:
@@ -1,5 +0,0 @@
|
||||
# Performance tests
|
||||
|
||||
These tests are in process of being ported to Catch framework [here](../catch/perftests/).
|
||||
|
||||
Do not try to compile these as these might fail due to absence of some critical components. If you are interested in performance tests you can try to compile the one's in catch/perftests.
|
||||
@@ -1,240 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD_CMD: hipPerfModuleLoad %hc -I%S/../../src %S/%s %S/../../src/test_common.cpp -o %T/%t
|
||||
* EXCLUDE_HIP_PLATFORM nvidia TEST: %t HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <fstream>
|
||||
#include <unistd.h>
|
||||
#include <chrono>
|
||||
|
||||
#ifdef __unix__
|
||||
|
||||
#include <dirent.h>
|
||||
|
||||
// List of Download files
|
||||
std::unordered_map<std::string, bool> TL_contents{{"Kernels.so", true},
|
||||
{"TensileLibrary.yaml", true},
|
||||
{"TensileLibrary_gfx803.co", true},
|
||||
{"TensileLibrary_gfx900.co", true},
|
||||
{"TensileLibrary_gfx906.co", true},
|
||||
{"TensileLibrary_gfx908.co", true},
|
||||
{"kernel_names.txt", true}};
|
||||
|
||||
bool GetDirectoryContents(std::unordered_map<std::string, bool>& dir_contents) {
|
||||
DIR* dir = nullptr;
|
||||
struct dirent* ent = nullptr;
|
||||
|
||||
// Open Current Directory
|
||||
if ((dir = opendir(".")) == 0) {
|
||||
std::cout << "Failed to open current working directory, check permissions" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Read the contents of the directory
|
||||
while ((ent = readdir(dir)) != nullptr) {
|
||||
dir_contents.insert({ent->d_name, true});
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ContentsAvailable() {
|
||||
std::unordered_map<std::string, bool> dir_contents;
|
||||
|
||||
// Get recent directory contents
|
||||
if (!GetDirectoryContents(dir_contents)) {
|
||||
std::cout << "Failed to get directory Contents" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the Tensile Library content is not present, then fail
|
||||
for (auto& TL_elem : TL_contents) {
|
||||
if (dir_contents.end() == dir_contents.find(TL_elem.first)) {
|
||||
std::cout << "Failed to find the Tensile Library file: " << TL_elem.first << std::endl;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DownloadContents() {
|
||||
// Download the contents from TC repo
|
||||
std::cout << "Downloading conents .... " << std::endl;
|
||||
std::string wget_str = "wget -nH -q -N -r -np -R \" index.html* \" --cut-dirs=3 ";
|
||||
wget_str += "http://ocltc-backup.amd.com/hiptest/TensileLibrary/";
|
||||
system(wget_str.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PreProcessContents() {
|
||||
// If Contents already available no other action needed
|
||||
if (ContentsAvailable()) {
|
||||
std::cout << "Contents already available" << std::endl;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Download the TL(Tensile Library) contents from TC
|
||||
if (!DownloadContents()) {
|
||||
std::cout << "Failed to download contents" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if downloaded contents are available
|
||||
if (!ContentsAvailable()) {
|
||||
std::cout << "Failed to find TL contents even after download in CWD" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Get Tensile Library File name, changes wrt target
|
||||
bool getTLFileName(int device_id, std::string& tlf_name) {
|
||||
hipDeviceProp_t props;
|
||||
HIPCHECK(hipGetDeviceProperties(&props, device_id));
|
||||
std::string archName = props.gcnArchName;
|
||||
if (archName.size() <= 3) {
|
||||
std::cout << "ArchName too small, Exiting" << std::endl;
|
||||
HIPASSERT(false);
|
||||
}
|
||||
archName = archName.substr(3, (archName.size() - 1));
|
||||
|
||||
tlf_name = "TensileLibrary_gfx" + archName;
|
||||
tlf_name += ".co";
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RunTest(int device_id) {
|
||||
std::cout << "For Device: " << device_id << std::endl;
|
||||
|
||||
// Get Tensile Library File name, changes wrt target
|
||||
std::string tlf_name;
|
||||
if (!getTLFileName(device_id, tlf_name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Measure Time taken for hipModuleLoad
|
||||
hipModule_t Module;
|
||||
auto mload_clock_start = std::chrono::steady_clock::now();
|
||||
HIPCHECK(hipModuleLoad(&Module, tlf_name.c_str()));
|
||||
auto mload_clock_stop = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double, std::nano> mload_duration = (mload_clock_stop - mload_clock_start);
|
||||
std::cout << "Time taken for hipModuleLoad : "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(mload_duration).count()
|
||||
<< " nanoseconds " << std::endl;
|
||||
|
||||
// Read kernels from a pre-populated text file
|
||||
std::string kernel_file_name = "kernel_names.txt";
|
||||
std::ifstream kernel_file(kernel_file_name);
|
||||
if (!kernel_file.is_open()) {
|
||||
std::cout << "Failed to open Kernel File: " << kernel_file_name << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string kernel_line;
|
||||
std::vector<std::string> kernel_vec;
|
||||
while (std::getline(kernel_file, kernel_line)) {
|
||||
kernel_line.erase(std::remove(kernel_line.begin(), kernel_line.end(), '\r'), kernel_line.end());
|
||||
kernel_vec.push_back(kernel_line);
|
||||
}
|
||||
|
||||
// Measure the first hipModuleGetFunction
|
||||
hipFunction_t hfunc = nullptr;
|
||||
auto mgetf_clock_start = std::chrono::steady_clock::now();
|
||||
HIPCHECK(hipModuleGetFunction(&hfunc, Module, kernel_vec[0].c_str()));
|
||||
auto mgetf_clock_stop = std::chrono::steady_clock::now();
|
||||
std::chrono::duration<double, std::nano> mgetf_duration = (mgetf_clock_stop - mgetf_clock_start);
|
||||
std::cout << "Time taken to fetch a function via hipModuleGetFunction : "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(mgetf_duration).count()
|
||||
<< " nanoseconds " << std::endl;
|
||||
|
||||
// Measure the second hipModuleGetFunction
|
||||
hfunc = nullptr;
|
||||
mgetf_clock_start = std::chrono::steady_clock::now();
|
||||
HIPCHECK(hipModuleGetFunction(&hfunc, Module, kernel_vec[0].c_str()));
|
||||
mgetf_clock_stop = std::chrono::steady_clock::now();
|
||||
mgetf_duration = (mgetf_clock_stop - mgetf_clock_start);
|
||||
std::cout << "Time taken fetch the same function via hipModuleGetFunction : "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(mgetf_duration).count()
|
||||
<< " nanoseconds " << std::endl;
|
||||
|
||||
double all_duration = 0;
|
||||
for (auto& kernel : kernel_vec) {
|
||||
hfunc = nullptr;
|
||||
mgetf_clock_start = std::chrono::steady_clock::now();
|
||||
HIPCHECK(hipModuleGetFunction(&hfunc, Module, kernel.c_str()));
|
||||
mgetf_clock_stop = std::chrono::steady_clock::now();
|
||||
mgetf_duration = (mgetf_clock_stop - mgetf_clock_start);
|
||||
all_duration += mgetf_duration.count();
|
||||
}
|
||||
|
||||
if (kernel_vec.size() > 0) {
|
||||
std::cout << "Time taken for Average hipModuleGetFunction : "
|
||||
<< (static_cast<double>(all_duration) / static_cast<double>(kernel_vec.size()))
|
||||
<< " nanoseconds " << std::endl;
|
||||
}
|
||||
|
||||
std::cout << std::endl << std::endl;
|
||||
|
||||
HIPCHECK(hipModuleUnload(Module));
|
||||
return true;
|
||||
}
|
||||
#endif //__unix__
|
||||
|
||||
int main() {
|
||||
bool test_passed = true;
|
||||
|
||||
do {
|
||||
#ifdef __unix__
|
||||
// Preprocess contents for the test
|
||||
if (!PreProcessContents()) {
|
||||
std::cout << "Failed in PreProcessContents step" << std::endl;
|
||||
test_passed = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// Run the test for all devices
|
||||
int num_devices = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&num_devices));
|
||||
for (int dev_idx = 0; dev_idx < num_devices; ++dev_idx) {
|
||||
if (!RunTest(dev_idx)) {
|
||||
test_passed = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
std::cout << "Detected non-linux Os. Skipping the test" << std::endl;
|
||||
#endif // __unix__
|
||||
} while (0);
|
||||
|
||||
if (test_passed) {
|
||||
passed();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,259 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "test_common.h"
|
||||
|
||||
#include <thread>
|
||||
#ifdef __linux__
|
||||
#include <sys/sysinfo.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// standard global variables that can be set on command line
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
char memsetval = 0x42;
|
||||
int memsetD32val = 0xDEADBEEF;
|
||||
short memsetD16val = 0xDEAD;
|
||||
char memsetD8val = 0xDE;
|
||||
int iterations = 1;
|
||||
unsigned blocksPerCU = 6; // to hide latency
|
||||
unsigned threadsPerBlock = 256;
|
||||
int textureFilterMode = 0; // 0: hipFilterModePoint; 1: hipFilterModeLinear
|
||||
int p_gpuDevice = 0;
|
||||
unsigned p_verbose = 0;
|
||||
int p_tests = -1; /*which tests to run. Interpretation is left to each test. default:all*/
|
||||
int debug_test = 0;
|
||||
#ifdef _WIN64
|
||||
const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES=";
|
||||
const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES=";
|
||||
const char* PATH_SEPERATOR_STR = "\\";
|
||||
const char* NULL_DEVICE = "NUL:";
|
||||
#else
|
||||
const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES";
|
||||
const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES";
|
||||
const char* PATH_SEPERATOR_STR = "/";
|
||||
const char* NULL_DEVICE = "/dev/null";
|
||||
#endif
|
||||
|
||||
#ifdef _WIN64
|
||||
// Windows does not have rand_r, use srand and rand instead.
|
||||
int rand_r(unsigned int* s) {
|
||||
srand(*s);
|
||||
return rand();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Get Free Memory from the system
|
||||
static size_t getMemoryAmount() {
|
||||
#if __linux__
|
||||
struct sysinfo info;
|
||||
int _ = sysinfo(&info);
|
||||
return info.freeram / (1024 * 1024); // MB
|
||||
#elif defined(_WIN32)
|
||||
MEMORYSTATUSEX statex;
|
||||
statex.dwLength = sizeof(statex);
|
||||
GlobalMemoryStatusEx(&statex);
|
||||
return (statex.ullAvailPhys / (1024 * 1024)); // MB
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t getHostThreadCount(const size_t memPerThread, const size_t maxThreads) {
|
||||
if (memPerThread == 0) return 0;
|
||||
auto memAmount = getMemoryAmount();
|
||||
const auto processor_count = std::thread::hardware_concurrency();
|
||||
if (processor_count == 0 || memAmount == 0) return 0;
|
||||
size_t thread_count = 0;
|
||||
if ((processor_count * memPerThread) < memAmount)
|
||||
thread_count = processor_count;
|
||||
else
|
||||
thread_count = reinterpret_cast<size_t>(memAmount / memPerThread);
|
||||
if (maxThreads > 0) {
|
||||
return (thread_count > maxThreads) ? maxThreads : thread_count;
|
||||
}
|
||||
return thread_count;
|
||||
}
|
||||
|
||||
// Function to determine if the device is of gfx11 architecture
|
||||
bool IsGfx11() {
|
||||
#if defined(__HIP_PLATFORM_NVIDIA__)
|
||||
return false;
|
||||
#elif defined(__HIP_PLATFORM_AMD__)
|
||||
int device = -1;
|
||||
hipDeviceProp_t props{};
|
||||
HIPCHECK(hipGetDevice(&device));
|
||||
HIPCHECK(hipGetDeviceProperties(&props, device));
|
||||
|
||||
// Get GCN Arch Name and compare to check if it is gfx11
|
||||
std::string arch = std::string(props.gcnArchName);
|
||||
auto pos = arch.find(":");
|
||||
if (pos != std::string::npos) arch = arch.substr(0, pos);
|
||||
|
||||
if (arch.size() >= 5) arch = arch.substr(0, 5);
|
||||
|
||||
return (arch == std::string("gfx11")) ? true : false;
|
||||
#else
|
||||
std::cout << "Have to be either Nvidia or AMD platform, asserting" << std::endl;
|
||||
assert(false);
|
||||
#endif
|
||||
}
|
||||
|
||||
namespace HipTest {
|
||||
|
||||
|
||||
double elapsed_time(long long startTimeUs, long long stopTimeUs) {
|
||||
return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000));
|
||||
}
|
||||
|
||||
|
||||
int parseSize(const char* str, size_t* output) {
|
||||
char* next;
|
||||
*output = strtoull(str, &next, 0);
|
||||
int l = strlen(str);
|
||||
if (l) {
|
||||
char c = str[l - 1]; // last char.
|
||||
if ((c == 'k') || (c == 'K')) {
|
||||
*output *= 1024;
|
||||
}
|
||||
if ((c == 'm') || (c == 'M')) {
|
||||
*output *= (1024 * 1024);
|
||||
}
|
||||
if ((c == 'g') || (c == 'G')) {
|
||||
*output *= (1024 * 1024 * 1024);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
int parseUInt(const char* str, unsigned int* output) {
|
||||
char* next;
|
||||
*output = strtoul(str, &next, 0);
|
||||
return !strlen(next);
|
||||
}
|
||||
|
||||
|
||||
int parseInt(const char* str, int* output) {
|
||||
char* next;
|
||||
*output = strtol(str, &next, 0);
|
||||
return !strlen(next);
|
||||
}
|
||||
|
||||
|
||||
int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) {
|
||||
int extraArgs = 1;
|
||||
for (int i = 1; i < argc; i++) {
|
||||
const char* arg = argv[i];
|
||||
|
||||
if (!strcmp(arg, " ")) {
|
||||
// skip NULL args.
|
||||
} else if (!strcmp(arg, "--N") || (!strcmp(arg, "-N"))) {
|
||||
if (++i >= argc || !HipTest::parseSize(argv[i], &N)) {
|
||||
failed("Bad N size argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--threadsPerBlock")) {
|
||||
if (++i >= argc || !HipTest::parseUInt(argv[i], &threadsPerBlock)) {
|
||||
failed("Bad threadsPerBlock argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--blocksPerCU")) {
|
||||
if (++i >= argc || !HipTest::parseUInt(argv[i], &blocksPerCU)) {
|
||||
failed("Bad blocksPerCU argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--memsetval")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetval argument");
|
||||
}
|
||||
memsetval = ex;
|
||||
} else if (!strcmp(arg, "--memsetD32val")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetD32val argument");
|
||||
}
|
||||
memsetD32val = ex;
|
||||
} else if (!strcmp(arg, "--memsetD16val")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetD16val argument");
|
||||
}
|
||||
memsetD16val = ex;
|
||||
} else if (!strcmp(arg, "--memsetD8val")) {
|
||||
int ex;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) {
|
||||
failed("Bad memsetD8val argument");
|
||||
}
|
||||
memsetD8val = ex;
|
||||
} else if (!strcmp(arg, "--textureFilterMode")) {
|
||||
int mode;
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &mode)) {
|
||||
failed("Bad textureFilterMode argument");
|
||||
}
|
||||
textureFilterMode = mode;
|
||||
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) {
|
||||
failed("Bad iterations argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) {
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) {
|
||||
failed("Bad gpuDevice argument");
|
||||
}
|
||||
|
||||
} else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) {
|
||||
if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) {
|
||||
failed("Bad verbose argument");
|
||||
}
|
||||
} else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) {
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) {
|
||||
failed("Bad tests argument");
|
||||
}
|
||||
|
||||
} else if (!strcmp(arg, "--debug") || (!strcmp(arg, "-d"))) {
|
||||
if (++i >= argc || !HipTest::parseInt(argv[i], &debug_test)) {
|
||||
failed("Bad tests argument");
|
||||
}
|
||||
} else {
|
||||
if (failOnUndefinedArg) {
|
||||
failed("Bad argument '%s'", arg);
|
||||
} else {
|
||||
argv[extraArgs++] = argv[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return extraArgs;
|
||||
}
|
||||
|
||||
|
||||
unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
|
||||
int device;
|
||||
HIPCHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t props;
|
||||
HIPCHECK(hipGetDeviceProperties(&props, device));
|
||||
|
||||
unsigned blocks = props.multiProcessorCount * blocksPerCU;
|
||||
if (blocks * threadsPerBlock > N) {
|
||||
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
} // namespace HipTest
|
||||
@@ -1,573 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* File is intended to C and CPP compliant hence any CPP specic changes
|
||||
* should be added into CPP section
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#if __CUDACC__
|
||||
#include <sys/time.h>
|
||||
#else
|
||||
#include <chrono>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// ************************ GCC section **************************
|
||||
#include <stddef.h>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
#define HC __attribute__((hc))
|
||||
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KRED "\x1B[31m"
|
||||
#define KGRN "\x1B[32m"
|
||||
#define KYEL "\x1B[33m"
|
||||
#define KBLU "\x1B[34m"
|
||||
#define KMAG "\x1B[35m"
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
// HIP Skip Return code set at cmake
|
||||
#define HIP_SKIP_RETURN_CODE 127
|
||||
#define HIP_ENABLE_SKIP_TESTS 0
|
||||
|
||||
// Recommended thresholds for Tests
|
||||
#define MAX_THREADS 100
|
||||
|
||||
inline bool hip_skip_tests_enabled() { return HIP_ENABLE_SKIP_TESTS; }
|
||||
|
||||
inline int hip_skip_retcode() {
|
||||
// HIP Skip Return code set at cmake
|
||||
return HIP_SKIP_RETURN_CODE;
|
||||
}
|
||||
|
||||
// This must be called in the end of main() to indicate test passed with success.
|
||||
// If it's called somewhere else, compiling issues or unexpected result will arise.
|
||||
#define passed() \
|
||||
printf("%sPASSED!%s\n", KGRN, KNRM); \
|
||||
return 0;
|
||||
|
||||
// The real "assert" would have written to stderr. But it is
|
||||
// sufficient to just fflush here without getting pedantic. This also
|
||||
// ensures that we don't lose any earlier writes to stdout.
|
||||
#define failed(...) \
|
||||
printf("%serror: ", KRED); \
|
||||
printf(__VA_ARGS__); \
|
||||
printf("\n"); \
|
||||
printf("error: TEST FAILED\n%s", KNRM); \
|
||||
fflush(NULL); \
|
||||
abort();
|
||||
|
||||
#define warn(...) \
|
||||
printf("%swarn: ", KYEL); \
|
||||
printf(__VA_ARGS__); \
|
||||
printf("\n"); \
|
||||
printf("warn: TEST WARNING\n%s", KNRM);
|
||||
|
||||
#define HIP_PRINT_STATUS(status) \
|
||||
std::cout << hipGetErrorName(status) << " at line: " << __LINE__ << std::endl;
|
||||
|
||||
#define HIPCHECK(error) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \
|
||||
localError, #error, __FILE__, __LINE__, KNRM); \
|
||||
failed("API returned error code."); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define HIPASSERT(condition) \
|
||||
if (!(condition)) { \
|
||||
failed("%sassertion %s at %s:%d%s \n", KRED, #condition, __FILE__, __LINE__, KNRM); \
|
||||
}
|
||||
|
||||
|
||||
#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \
|
||||
{ \
|
||||
hipError_t _e = (API_CALL); \
|
||||
if (_e != (EXPECTED_ERROR)) { \
|
||||
failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", KRED, #API_CALL, \
|
||||
_e, hipGetErrorName(_e), EXPECTED_ERROR, hipGetErrorName(EXPECTED_ERROR), __FILE__, \
|
||||
__LINE__, KNRM); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define HIPCHECK_RETURN_ONFAIL(func) \
|
||||
do { \
|
||||
hipError_t herror = (func); \
|
||||
if (herror != hipSuccess) { \
|
||||
return herror; \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
#ifdef _WIN64
|
||||
#include <tchar.h>
|
||||
#define aligned_alloc(x, y) _aligned_malloc(y, x)
|
||||
#define aligned_free(x) _aligned_free(x)
|
||||
#define popen(x, y) _popen(x, y)
|
||||
#define pclose(x) _pclose(x)
|
||||
#define setenv(x, y, z) _putenv_s(x, y)
|
||||
#define unsetenv _putenv
|
||||
#define fileno(x) _fileno(x)
|
||||
#define dup(x) _dup(x)
|
||||
#define dup2(x, y) _dup2(x, y)
|
||||
#define pipe(x, y, z) _pipe(x, y, z)
|
||||
#define sleep(x) _sleep(x)
|
||||
#else
|
||||
#define aligned_free(x) free(x)
|
||||
#endif
|
||||
|
||||
// standard command-line variables:
|
||||
extern size_t N;
|
||||
extern char memsetval;
|
||||
extern int memsetD32val;
|
||||
extern short memsetD16val;
|
||||
extern char memsetD8val;
|
||||
extern int iterations;
|
||||
extern unsigned blocksPerCU;
|
||||
extern unsigned threadsPerBlock;
|
||||
extern int textureFilterMode;
|
||||
extern int p_gpuDevice;
|
||||
extern unsigned p_verbose;
|
||||
extern int p_tests;
|
||||
extern int debug_test;
|
||||
extern const char* HIP_VISIBLE_DEVICES_STR;
|
||||
extern const char* CUDA_VISIBLE_DEVICES_STR;
|
||||
extern const char* PATH_SEPERATOR_STR;
|
||||
extern const char* NULL_DEVICE;
|
||||
|
||||
// ********************* CPP section *********************
|
||||
#ifdef __cplusplus
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC
|
||||
#define TYPENAME(T) typeid(T).name()
|
||||
#else
|
||||
#define TYPENAME(T) "?"
|
||||
#endif
|
||||
|
||||
#ifdef _WIN64
|
||||
int rand_r(unsigned int* s);
|
||||
#endif
|
||||
|
||||
// Get Optimal Thread count size
|
||||
size_t getHostThreadCount(const size_t memPerThread = 200 /* MB */, const size_t maxThreads = 0);
|
||||
|
||||
namespace HipTest {
|
||||
|
||||
// Returns the current system time in microseconds
|
||||
inline long long get_time() {
|
||||
#if __CUDACC__
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, 0);
|
||||
return (tv.tv_sec * 1000000) + tv.tv_usec;
|
||||
#else
|
||||
return std::chrono::high_resolution_clock::now().time_since_epoch() /
|
||||
std::chrono::microseconds(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
double elapsed_time(long long startTimeUs, long long stopTimeUs);
|
||||
|
||||
int parseSize(const char* str, size_t* output);
|
||||
int parseUInt(const char* str, unsigned int* output);
|
||||
int parseInt(const char* str, int* output);
|
||||
int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg);
|
||||
|
||||
unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N);
|
||||
|
||||
template <typename T> // pointer type
|
||||
void checkArray(T hData, T hOutputData, size_t width, size_t height, size_t depth) {
|
||||
for (int i = 0; i < depth; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int k = 0; k < width; k++) {
|
||||
int offset = i * width * height + j * width + k;
|
||||
if (hData[offset] != hOutputData[offset]) {
|
||||
std::cerr << '[' << i << ',' << j << ',' << k << "]:" << hData[offset] << "----"
|
||||
<< hOutputData[offset] << " ";
|
||||
failed("mistmatch at:%d %d %d", i, j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> void checkArray(T input, T output, size_t height, size_t width) {
|
||||
for (int i = 0; i < height; i++) {
|
||||
for (int j = 0; j < width; j++) {
|
||||
int offset = i * width + j;
|
||||
if (input[offset] != output[offset]) {
|
||||
std::cerr << '[' << i << ',' << j << ',' << "]:" << input[offset] << "----"
|
||||
<< output[offset] << " ";
|
||||
failed("mistmatch at:%d %d", i, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) {
|
||||
// Initialize the host data:
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
if (A_h) (A_h)[i] = 3.146f + i; // Pi
|
||||
if (B_h) (B_h)[i] = 1.618f + i; // Phi
|
||||
if (C_h) (C_h)[i] = 0.0f + i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK(hipHostMalloc(reinterpret_cast<void**>(A_h), Nbytes));
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK(hipHostMalloc(reinterpret_cast<void**>(B_h), Nbytes));
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK(hipHostMalloc(reinterpret_cast<void**>(C_h), Nbytes));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*A_h != NULL);
|
||||
}
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*B_h != NULL);
|
||||
}
|
||||
|
||||
if (C_h) {
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
HIPASSERT(*C_h != NULL);
|
||||
}
|
||||
}
|
||||
|
||||
setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> void initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h,
|
||||
size_t N, bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIPCHECK(hipMalloc(A_d, Nbytes));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipMalloc(B_d, Nbytes));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipMalloc(C_d, Nbytes));
|
||||
}
|
||||
|
||||
initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
|
||||
}
|
||||
|
||||
|
||||
template <typename T> void freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK(hipHostFree(A_h));
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK(hipHostFree(B_h));
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK(hipHostFree(C_h));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
free(A_h);
|
||||
}
|
||||
if (B_h) {
|
||||
free(B_h);
|
||||
}
|
||||
if (C_h) {
|
||||
free(C_h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (A_d) {
|
||||
HIPCHECK(hipFree(A_d));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipFree(B_d));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
|
||||
}
|
||||
|
||||
#if defined(__HIP_PLATFORM_AMD__)
|
||||
template <typename T> void initArrays2DPitch(T** A_d, T** B_d, T** C_d, size_t* pitch_A,
|
||||
size_t* pitch_B, size_t* pitch_C, size_t numW,
|
||||
size_t numH) {
|
||||
if (A_d) {
|
||||
HIPCHECK(hipMallocPitch((void**)A_d, pitch_A, numW * sizeof(T), numH));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipMallocPitch((void**)B_d, pitch_B, numW * sizeof(T), numH));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipMallocPitch((void**)C_d, pitch_C, numW * sizeof(T), numH));
|
||||
}
|
||||
|
||||
HIPASSERT(*pitch_A == *pitch_B);
|
||||
HIPASSERT(*pitch_A == *pitch_C)
|
||||
}
|
||||
|
||||
inline void initHIPArrays(hipArray** A_d, hipArray** B_d, hipArray** C_d,
|
||||
const hipChannelFormatDesc* desc, const size_t numW, const size_t numH,
|
||||
const unsigned int flags) {
|
||||
if (A_d) {
|
||||
HIPCHECK(hipMallocArray(A_d, desc, numW, numH, flags));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipMallocArray(B_d, desc, numW, numH, flags));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipMallocArray(C_d, desc, numW, numH, flags));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Assumes C_h contains vector add of A_h + B_h
|
||||
// Calls the test "failed" macro if a mismatch is detected.
|
||||
template <typename T> size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N,
|
||||
bool expectMatch = true, bool reportMismatch = true) {
|
||||
size_t mismatchCount = 0;
|
||||
size_t firstMismatch = 0;
|
||||
size_t mismatchesToPrint = 10;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
T expected = A_h[i] + B_h[i];
|
||||
if (result_H[i] != expected) {
|
||||
if (mismatchCount == 0) {
|
||||
firstMismatch = i;
|
||||
}
|
||||
mismatchCount++;
|
||||
if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
|
||||
std::cout << std::fixed << std::setprecision(32);
|
||||
std::cout << "At " << i << std::endl;
|
||||
std::cout << " Computed:" << result_H[i] << std::endl;
|
||||
std::cout << " Expected:" << expected << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reportMismatch) {
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
failed("expected mismatches but did not detect any!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mismatchCount;
|
||||
}
|
||||
|
||||
|
||||
// Assumes C_h contains vector add of A_h + B_h
|
||||
// Calls the test "failed" macro if a mismatch is detected.
|
||||
template <typename T>
|
||||
void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) {
|
||||
size_t mismatchCount = 0;
|
||||
size_t firstMismatch = 0;
|
||||
size_t mismatchesToPrint = 10;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
if (result_H[i] != expected_H[i]) {
|
||||
if (mismatchCount == 0) {
|
||||
firstMismatch = i;
|
||||
}
|
||||
mismatchCount++;
|
||||
if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
|
||||
std::cout << std::fixed << std::setprecision(32);
|
||||
std::cout << "At " << i << std::endl;
|
||||
std::cout << " Computed:" << result_H[i] << std::endl;
|
||||
std::cout << " Expected:" << expected_H[i] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
fprintf(stderr, "%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
|
||||
// failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch);
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
failed("expected mismatches but did not detect any!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
struct Pinned {
|
||||
static const bool isPinned = true;
|
||||
static const char* str() { return "Pinned"; };
|
||||
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p;
|
||||
HIPCHECK(hipHostMalloc((void**)&p, sizeBytes));
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
struct Unpinned {
|
||||
static const bool isPinned = false;
|
||||
static const char* str() { return "Unpinned"; };
|
||||
|
||||
static void* Alloc(size_t sizeBytes) {
|
||||
void* p = malloc(sizeBytes);
|
||||
HIPASSERT(p);
|
||||
return p;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
struct Memcpy {
|
||||
static const char* str() { return "Memcpy"; };
|
||||
};
|
||||
|
||||
struct MemcpyAsync {
|
||||
static const char* str() { return "MemcpyAsync"; };
|
||||
};
|
||||
|
||||
|
||||
template <typename C> struct MemTraits;
|
||||
|
||||
|
||||
template <> struct MemTraits<Memcpy> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
HIPCHECK(hipMemcpy(dest, src, sizeBytes, kind));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <> struct MemTraits<MemcpyAsync> {
|
||||
static void Copy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
hipStream_t stream) {
|
||||
HIPCHECK(hipMemcpyAsync(dest, src, sizeBytes, kind, stream));
|
||||
}
|
||||
};
|
||||
|
||||
inline bool isImageSupported() {
|
||||
int imageSupport = 1;
|
||||
#ifdef __HIP_PLATFORM_AMD__
|
||||
HIPCHECK(hipDeviceGetAttribute(&imageSupport, hipDeviceAttributeImageSupport, p_gpuDevice));
|
||||
#endif
|
||||
return imageSupport != 0;
|
||||
}
|
||||
|
||||
}; // namespace HipTest
|
||||
|
||||
// This must be called in the beginning of image test app's main() to indicate whether image
|
||||
// is supported.
|
||||
#define checkImageSupport() \
|
||||
if (!HipTest::isImageSupported()) { \
|
||||
printf("Texture is not support on the device. Skipped.\n"); \
|
||||
passed(); \
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
||||
// Function to determine if the device is of gfx11 architecture
|
||||
bool IsGfx11();
|
||||
@@ -1,97 +0,0 @@
|
||||
#include "timer.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define VC_EXTRALEAN
|
||||
#include <windows.h>
|
||||
#pragma comment(lib, "user32")
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
#include <time.h>
|
||||
#define NANOSECONDS_PER_SEC 1000000000
|
||||
#endif
|
||||
|
||||
CPerfCounter::CPerfCounter() : _clocks(0), _start(0) {
|
||||
#ifdef _WIN32
|
||||
|
||||
QueryPerformanceFrequency((LARGE_INTEGER*)&_freq);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
_freq = NANOSECONDS_PER_SEC;
|
||||
#endif
|
||||
}
|
||||
|
||||
CPerfCounter::~CPerfCounter() {
|
||||
// EMPTY!
|
||||
}
|
||||
|
||||
void CPerfCounter::Start(void) {
|
||||
#ifdef _WIN32
|
||||
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&_start);
|
||||
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
_start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void CPerfCounter::Stop(void) {
|
||||
i64 n;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
if (!_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
QueryPerformanceCounter((LARGE_INTEGER*)&n);
|
||||
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
|
||||
struct timespec s;
|
||||
clock_gettime(CLOCK_MONOTONIC, &s);
|
||||
n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec;
|
||||
|
||||
#endif
|
||||
|
||||
n -= _start;
|
||||
_start = 0;
|
||||
_clocks += n;
|
||||
}
|
||||
|
||||
void CPerfCounter::Reset(void) {
|
||||
#ifdef _WIN32
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
_clocks = 0;
|
||||
}
|
||||
|
||||
double CPerfCounter::GetElapsedTime(void) {
|
||||
#ifdef _WIN32
|
||||
if (_start) {
|
||||
MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK);
|
||||
exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (double)_clocks / (double)_freq;
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
#ifndef _TIMER_H_
|
||||
#define _TIMER_H_
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef __int64 i64;
|
||||
#endif
|
||||
#ifdef __linux__
|
||||
typedef long long i64;
|
||||
#endif
|
||||
|
||||
class CPerfCounter {
|
||||
public:
|
||||
CPerfCounter();
|
||||
~CPerfCounter();
|
||||
void Start(void);
|
||||
void Stop(void);
|
||||
void Reset(void);
|
||||
double GetElapsedTime(void);
|
||||
|
||||
private:
|
||||
i64 _freq;
|
||||
i64 _clocks;
|
||||
i64 _start;
|
||||
};
|
||||
|
||||
#endif // _TIMER_H_
|
||||
مرجع در شماره جدید
Block a user