Simplify hipDispatchLatency sample (#1793)
* Use hipExtLaunchKernelGGL in dispatchlatency sample * Let it run on NVCC path too * Refactoring * Add test_kernel source * Remove ResultDB * Remove error checks
This commit is contained in:
@@ -2,19 +2,18 @@ HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
ifeq (,$(HIP_PATH))
|
||||
HIP_PATH=../../..
|
||||
endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11
|
||||
|
||||
EXE=hipDispatchLatency
|
||||
|
||||
CXXFLAGS = -O3
|
||||
|
||||
all: ${EXE}
|
||||
all: test_kernel.code ${EXE}
|
||||
|
||||
$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp
|
||||
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
|
||||
|
||||
install: $(EXE)
|
||||
cp $(EXE) $(HIP_PATH)/bin
|
||||
$(EXE): hipDispatchLatency.cpp
|
||||
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp -o $@
|
||||
|
||||
test_kernel.code: test_kernel.cpp
|
||||
$(HIP_PATH)/bin/hipcc --genco $(GENCO_FLAGS) $^ -o $@
|
||||
clean:
|
||||
rm -f *.o $(EXE)
|
||||
|
||||
@@ -1,473 +0,0 @@
|
||||
#include "ResultDatabase.h"
|
||||
|
||||
#include <cfloat>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define SORT_BY_NAME 0
|
||||
#define SORT_RETAIN_ATTS_ORDER 1
|
||||
|
||||
|
||||
bool ResultDatabase::Result::operator<(const Result& rhs) const {
|
||||
if (test < rhs.test) return true;
|
||||
if (test > rhs.test) return false;
|
||||
#if (SORT_RETAIN_ATTS_ORDER == 0)
|
||||
// For ties, sort by the value of the attribute:
|
||||
if (atts < rhs.atts) return true;
|
||||
if (atts > rhs.atts) return false;
|
||||
#endif
|
||||
return false; // less-operator returns false on equal
|
||||
}
|
||||
|
||||
double ResultDatabase::Result::GetMin() const {
|
||||
double r = FLT_MAX;
|
||||
for (int i = 0; i < value.size(); i++) {
|
||||
r = min(r, value[i]);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
double ResultDatabase::Result::GetMax() const {
|
||||
double r = -FLT_MAX;
|
||||
for (int i = 0; i < value.size(); i++) {
|
||||
r = max(r, value[i]);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); }
|
||||
|
||||
double ResultDatabase::Result::GetPercentile(double q) const {
|
||||
int n = value.size();
|
||||
if (n == 0) return FLT_MAX;
|
||||
if (n == 1) return value[0];
|
||||
|
||||
if (q <= 0) return value[0];
|
||||
if (q >= 100) return value[n - 1];
|
||||
|
||||
double index = ((n + 1.) * q / 100.) - 1;
|
||||
|
||||
vector<double> sorted = value;
|
||||
sort(sorted.begin(), sorted.end());
|
||||
|
||||
if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.));
|
||||
|
||||
int index_lo = int(index);
|
||||
double frac = index - index_lo;
|
||||
if (frac == 0) return sorted[index_lo];
|
||||
|
||||
double lo = sorted[index_lo];
|
||||
double hi = sorted[index_lo + 1];
|
||||
return lo + (hi - lo) * frac;
|
||||
}
|
||||
|
||||
double ResultDatabase::Result::GetMean() const {
|
||||
double r = 0;
|
||||
for (int i = 0; i < value.size(); i++) {
|
||||
r += value[i];
|
||||
}
|
||||
return r / double(value.size());
|
||||
}
|
||||
|
||||
double ResultDatabase::Result::GetStdDev() const {
|
||||
double r = 0;
|
||||
double u = GetMean();
|
||||
if (u == FLT_MAX) return FLT_MAX;
|
||||
for (int i = 0; i < value.size(); i++) {
|
||||
r += (value[i] - u) * (value[i] - u);
|
||||
}
|
||||
r = sqrt(r / value.size());
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit,
|
||||
const vector<double>& values) {
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
AddResult(test, atts, unit, values[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static string RemoveAllButLeadingSpaces(const string& a) {
|
||||
string b;
|
||||
int n = a.length();
|
||||
int i = 0;
|
||||
while (i < n && a[i] == ' ') {
|
||||
b += a[i];
|
||||
++i;
|
||||
}
|
||||
for (; i < n; i++) {
|
||||
if (a[i] != ' ' && a[i] != '\t') b += a[i];
|
||||
}
|
||||
return b;
|
||||
}
|
||||
|
||||
void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig,
|
||||
const string& unit_orig, double value) {
|
||||
string test = RemoveAllButLeadingSpaces(test_orig);
|
||||
string atts = RemoveAllButLeadingSpaces(atts_orig);
|
||||
string unit = RemoveAllButLeadingSpaces(unit_orig);
|
||||
int index;
|
||||
for (index = 0; index < results.size(); index++) {
|
||||
if (results[index].test == test && results[index].atts == atts) {
|
||||
if (results[index].unit != unit) throw "Internal error: mixed units";
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (index >= results.size()) {
|
||||
Result r;
|
||||
r.test = test;
|
||||
r.atts = atts;
|
||||
r.unit = unit;
|
||||
results.push_back(r);
|
||||
}
|
||||
|
||||
results[index].value.push_back(value);
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::DumpDetailed
|
||||
//
|
||||
// Purpose:
|
||||
// Writes the full results, including all trials.
|
||||
//
|
||||
// Arguments:
|
||||
// out where to print
|
||||
//
|
||||
// Programmer: Jeremy Meredith
|
||||
// Creation: August 14, 2009
|
||||
//
|
||||
// Modifications:
|
||||
// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010
|
||||
// Renamed to DumpDetailed to make room for a DumpSummary.
|
||||
//
|
||||
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
|
||||
// Added note about (*) missing value tag.
|
||||
//
|
||||
// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010
|
||||
// Changed note about missing values to be worded a little better.
|
||||
//
|
||||
// ****************************************************************************
|
||||
void ResultDatabase::DumpDetailed(ostream& out) {
|
||||
vector<Result> sorted(results);
|
||||
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
const int testNameW = 24;
|
||||
const int attW = 12;
|
||||
const int fieldW = 11;
|
||||
out << std::fixed << right << std::setprecision(4);
|
||||
|
||||
int maxtrials = 1;
|
||||
for (int i = 0; i < sorted.size(); i++) {
|
||||
if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size();
|
||||
}
|
||||
|
||||
// TODO: in big parallel runs, the "trials" are the procs
|
||||
// and we really don't want to print them all out....
|
||||
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t"
|
||||
<< "mean\t"
|
||||
<< "stddev\t"
|
||||
<< "min\t"
|
||||
<< "max\t";
|
||||
for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t";
|
||||
out << endl;
|
||||
|
||||
for (int i = 0; i < sorted.size(); i++) {
|
||||
Result& r = sorted[i];
|
||||
out << setw(testNameW) << r.test + "\t";
|
||||
out << setw(attW) << r.atts + "\t";
|
||||
out << setw(fieldW) << r.unit + "\t";
|
||||
if (r.GetMedian() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMedian() << "\t";
|
||||
if (r.GetMean() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMean() << "\t";
|
||||
if (r.GetStdDev() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetStdDev() << "\t";
|
||||
if (r.GetMin() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMin() << "\t";
|
||||
if (r.GetMax() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMax() << "\t";
|
||||
for (int j = 0; j < r.value.size(); j++) {
|
||||
if (r.value[j] == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.value[j] << "\t";
|
||||
}
|
||||
|
||||
out << endl;
|
||||
}
|
||||
if (0) {
|
||||
out << endl
|
||||
<< "Note: Any results marked with (*) had missing values." << endl
|
||||
<< " This can occur on systems with a mixture of" << endl
|
||||
<< " device types or architectural capabilities." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::DumpDetailed
|
||||
//
|
||||
// Purpose:
|
||||
// Writes the summary results (min/max/stddev/med/mean), but not
|
||||
// every individual trial.
|
||||
//
|
||||
// Arguments:
|
||||
// out where to print
|
||||
//
|
||||
// Programmer: Jeremy Meredith
|
||||
// Creation: November 10, 2010
|
||||
//
|
||||
// Modifications:
|
||||
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
|
||||
// Added note about (*) missing value tag.
|
||||
//
|
||||
// ****************************************************************************
|
||||
void ResultDatabase::DumpSummary(ostream& out) {
|
||||
vector<Result> sorted(results);
|
||||
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
const int testNameW = 32;
|
||||
const int attW = 12;
|
||||
const int fieldW = 9;
|
||||
out << std::fixed << right << std::setprecision(2);
|
||||
|
||||
// TODO: in big parallel runs, the "trials" are the procs
|
||||
// and we really don't want to print them all out....
|
||||
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t"
|
||||
<< "median\t"
|
||||
<< "mean\t"
|
||||
<< "stddev\t"
|
||||
<< "min\t"
|
||||
<< "max\t";
|
||||
out << endl;
|
||||
|
||||
for (int i = 0; i < sorted.size(); i++) {
|
||||
Result& r = sorted[i];
|
||||
out << setw(testNameW) << r.test + "\t";
|
||||
out << setw(attW) << r.atts + "\t";
|
||||
out << setw(fieldW) << r.unit + "\t";
|
||||
if (r.GetMedian() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMedian() << "\t";
|
||||
if (r.GetMean() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMean() << "\t";
|
||||
if (r.GetStdDev() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetStdDev() << "\t";
|
||||
if (r.GetMin() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMin() << "\t";
|
||||
if (r.GetMax() == FLT_MAX)
|
||||
out << "N/A\t";
|
||||
else
|
||||
out << r.GetMax() << "\t";
|
||||
|
||||
out << endl;
|
||||
}
|
||||
if (0) {
|
||||
out << endl
|
||||
<< "Note: results marked with (*) had missing values such as" << endl
|
||||
<< "might occur with a mixture of architectural capabilities." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::ClearAllResults
|
||||
//
|
||||
// Purpose:
|
||||
// Clears all existing results from the ResultDatabase; used for multiple passes
|
||||
// of the same test or multiple tests.
|
||||
//
|
||||
// Arguments:
|
||||
//
|
||||
// Programmer: Jeffrey Young
|
||||
// Creation: September 10th, 2014
|
||||
//
|
||||
// Modifications:
|
||||
//
|
||||
//
|
||||
// ****************************************************************************
|
||||
void ResultDatabase::ClearAllResults() { results.clear(); }
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::DumpCsv
|
||||
//
|
||||
// Purpose:
|
||||
// Writes either detailed or summary results (min/max/stddev/med/mean), but not
|
||||
// every individual trial.
|
||||
//
|
||||
// Arguments:
|
||||
// out file to print CSV results
|
||||
//
|
||||
// Programmer: Jeffrey Young
|
||||
// Creation: August 28th, 2014
|
||||
//
|
||||
// Modifications:
|
||||
//
|
||||
// ****************************************************************************
|
||||
void ResultDatabase::DumpCsv(string fileName) {
|
||||
bool emptyFile;
|
||||
vector<Result> sorted(results);
|
||||
|
||||
#if SORT_BY_NAME
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
#endif
|
||||
|
||||
// Check to see if the file is empty - if so, add the headers
|
||||
emptyFile = this->IsFileEmpty(fileName);
|
||||
|
||||
// Open file and append by default
|
||||
ofstream out;
|
||||
out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app);
|
||||
|
||||
// Add headers only for empty files
|
||||
if (emptyFile) {
|
||||
// TODO: in big parallel runs, the "trials" are the procs
|
||||
// and we really don't want to print them all out....
|
||||
out << "test, "
|
||||
<< "atts, "
|
||||
<< "units, "
|
||||
<< "median, "
|
||||
<< "mean, "
|
||||
<< "stddev, "
|
||||
<< "min, "
|
||||
<< "max, ";
|
||||
out << endl;
|
||||
}
|
||||
|
||||
for (int i = 0; i < sorted.size(); i++) {
|
||||
Result& r = sorted[i];
|
||||
out << r.test << ", ";
|
||||
out << r.atts << ", ";
|
||||
out << r.unit << ", ";
|
||||
if (r.GetMedian() == FLT_MAX)
|
||||
out << "N/A, ";
|
||||
else
|
||||
out << r.GetMedian() << ", ";
|
||||
if (r.GetMean() == FLT_MAX)
|
||||
out << "N/A, ";
|
||||
else
|
||||
out << r.GetMean() << ", ";
|
||||
if (r.GetStdDev() == FLT_MAX)
|
||||
out << "N/A, ";
|
||||
else
|
||||
out << r.GetStdDev() << ", ";
|
||||
if (r.GetMin() == FLT_MAX)
|
||||
out << "N/A, ";
|
||||
else
|
||||
out << r.GetMin() << ", ";
|
||||
if (r.GetMax() == FLT_MAX)
|
||||
out << "N/A, ";
|
||||
else
|
||||
out << r.GetMax() << ", ";
|
||||
|
||||
out << endl;
|
||||
}
|
||||
out << endl;
|
||||
|
||||
out.close();
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::IsFileEmpty
|
||||
//
|
||||
// Purpose:
|
||||
// Returns whether a file is empty - used as a helper for CSV printing
|
||||
//
|
||||
// Arguments:
|
||||
// file The input file to check for emptiness
|
||||
//
|
||||
// Programmer: Jeffrey Young
|
||||
// Creation: August 28th, 2014
|
||||
//
|
||||
// Modifications:
|
||||
//
|
||||
// ****************************************************************************
|
||||
|
||||
bool ResultDatabase::IsFileEmpty(string fileName) {
|
||||
bool fileEmpty;
|
||||
|
||||
ifstream file(fileName.c_str());
|
||||
|
||||
// If the file doesn't exist it is by definition empty
|
||||
if (!file.good()) {
|
||||
return true;
|
||||
} else {
|
||||
fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof());
|
||||
file.close();
|
||||
|
||||
return fileEmpty;
|
||||
}
|
||||
|
||||
// Otherwise, return false
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::GetResultsForTest
|
||||
//
|
||||
// Purpose:
|
||||
// Returns a vector of results for just one test name.
|
||||
//
|
||||
// Arguments:
|
||||
// test the name of the test results to search for
|
||||
//
|
||||
// Programmer: Jeremy Meredith
|
||||
// Creation: December 3, 2010
|
||||
//
|
||||
// Modifications:
|
||||
//
|
||||
// ****************************************************************************
|
||||
vector<ResultDatabase::Result> ResultDatabase::GetResultsForTest(const string& test) {
|
||||
// get only the given test results
|
||||
vector<Result> retval;
|
||||
for (int i = 0; i < results.size(); i++) {
|
||||
Result& r = results[i];
|
||||
if (r.test == test) retval.push_back(r);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
// Method: ResultDatabase::GetResults
|
||||
//
|
||||
// Purpose:
|
||||
// Returns all the results.
|
||||
//
|
||||
// Arguments:
|
||||
//
|
||||
// Programmer: Jeremy Meredith
|
||||
// Creation: December 3, 2010
|
||||
//
|
||||
// Modifications:
|
||||
//
|
||||
// ****************************************************************************
|
||||
const vector<ResultDatabase::Result>& ResultDatabase::GetResults() const { return results; }
|
||||
@@ -1,89 +0,0 @@
|
||||
#ifndef RESULT_DATABASE_H
|
||||
#define RESULT_DATABASE_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cfloat>
|
||||
using std::ifstream;
|
||||
using std::ofstream;
|
||||
using std::ostream;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
|
||||
// ****************************************************************************
|
||||
// Class: ResultDatabase
|
||||
//
|
||||
// Purpose:
|
||||
// Track numerical results as they are generated.
|
||||
// Print statistics of raw results.
|
||||
//
|
||||
// Programmer: Jeremy Meredith
|
||||
// Creation: June 12, 2009
|
||||
//
|
||||
// Modifications:
|
||||
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
|
||||
// Split timing reports into detailed and summary. E.g. for serial code,
|
||||
// we might report all trial values, but skip them in parallel.
|
||||
//
|
||||
// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010
|
||||
// Added check for missing value tag.
|
||||
//
|
||||
// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010
|
||||
// Added percentile statistic.
|
||||
//
|
||||
// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010
|
||||
// Added a method to extract a subset of results based on test name. Also,
|
||||
// the Result class is now public, so that clients can use them directly.
|
||||
// Added a GetResults method as well, and made several functions const.
|
||||
//
|
||||
// ****************************************************************************
|
||||
class ResultDatabase {
|
||||
public:
|
||||
//
|
||||
// A performance result for a single SHOC benchmark run.
|
||||
//
|
||||
struct Result {
|
||||
string test; // e.g. "readback"
|
||||
string atts; // e.g. "pagelocked 4k^2"
|
||||
string unit; // e.g. "MB/sec"
|
||||
vector<double> value; // e.g. "837.14"
|
||||
double GetMin() const;
|
||||
double GetMax() const;
|
||||
double GetMedian() const;
|
||||
double GetPercentile(double q) const;
|
||||
double GetMean() const;
|
||||
double GetStdDev() const;
|
||||
|
||||
bool operator<(const Result& rhs) const;
|
||||
|
||||
bool HadAnyFLTMAXValues() const {
|
||||
for (int i = 0; i < value.size(); ++i) {
|
||||
if (value[i] >= FLT_MAX) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
protected:
|
||||
vector<Result> results;
|
||||
|
||||
public:
|
||||
void AddResult(const string& test, const string& atts, const string& unit, double value);
|
||||
void AddResults(const string& test, const string& atts, const string& unit,
|
||||
const vector<double>& values);
|
||||
vector<Result> GetResultsForTest(const string& test);
|
||||
const vector<Result>& GetResults() const;
|
||||
void ClearAllResults();
|
||||
void DumpDetailed(ostream&);
|
||||
void DumpSummary(ostream&);
|
||||
void DumpCsv(string fileName);
|
||||
|
||||
private:
|
||||
bool IsFileEmpty(string fileName);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,16 +1,13 @@
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
@@ -21,142 +18,134 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include "hip/hip_ext.h"
|
||||
#endif
|
||||
#include <iostream>
|
||||
#include <time.h>
|
||||
#include "ResultDatabase.h"
|
||||
|
||||
#define PRINT_PROGRESS 0
|
||||
|
||||
#define check(cmd) \
|
||||
{ \
|
||||
hipError_t status = cmd; \
|
||||
if (status != hipSuccess) { \
|
||||
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(status), status, #cmd, \
|
||||
__FILE__, __LINE__); \
|
||||
abort(); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define LEN 1024 * 1024
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
||||
#define NUM_GROUPS 1
|
||||
#define GROUP_SIZE 64
|
||||
#define TEST_ITERS 20
|
||||
#define DISPATCHES_PER_TEST 100
|
||||
#define GROUP_SIZE 1
|
||||
#define WARMUP_RUN_COUNT 10
|
||||
#define TIMING_RUN_COUNT 100
|
||||
#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT
|
||||
#define BATCH_SIZE 1000
|
||||
|
||||
const unsigned p_tests = 0xfffffff;
|
||||
#define FILE_NAME "test_kernel.code"
|
||||
#define KERNEL_NAME "test"
|
||||
|
||||
__global__ void EmptyKernel() { }
|
||||
|
||||
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
|
||||
__global__ void NearlyNull(float* Ad) {
|
||||
if (Ad) {
|
||||
Ad[0] = 42;
|
||||
}
|
||||
void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1) {
|
||||
|
||||
float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f;
|
||||
|
||||
// skip warm-up runs
|
||||
auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT);
|
||||
auto end_iter = results.end();
|
||||
|
||||
// mean
|
||||
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
|
||||
total_us += (run_ms * 1000) / batch;
|
||||
});
|
||||
mean_us = total_us / TIMING_RUN_COUNT;
|
||||
|
||||
// stddev
|
||||
total_us = 0;
|
||||
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
|
||||
float dev_us = ((run_ms * 1000) / batch) - mean_us;
|
||||
total_us += dev_us * dev_us;
|
||||
});
|
||||
stddev_us = sqrt(total_us / TIMING_RUN_COUNT);
|
||||
|
||||
// display
|
||||
printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us);
|
||||
}
|
||||
|
||||
|
||||
ResultDatabase resultDB;
|
||||
|
||||
|
||||
void stopTest(hipEvent_t start, hipEvent_t stop, const char* msg, int iters) {
|
||||
float mS = 0;
|
||||
check(hipEventRecord(stop));
|
||||
check(hipDeviceSynchronize());
|
||||
check(hipEventElapsedTime(&mS, start, stop));
|
||||
resultDB.AddResult(std::string(msg), "", "uS", mS * 1000 / iters);
|
||||
if (PRINT_PROGRESS & 0x1) {
|
||||
std::cout << msg << "\t\t" << mS * 1000 / iters << " uS" << std::endl;
|
||||
}
|
||||
if (PRINT_PROGRESS & 0x2) {
|
||||
resultDB.DumpSummary(std::cout);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
hipError_t err;
|
||||
float* Ad;
|
||||
check(hipMalloc(&Ad, 4));
|
||||
|
||||
|
||||
hipStream_t stream;
|
||||
check(hipStreamCreate(&stream));
|
||||
|
||||
|
||||
hipEvent_t start, sync, stop;
|
||||
check(hipEventCreate(&start));
|
||||
check(hipEventCreateWithFlags(&sync, hipEventBlockingSync));
|
||||
check(hipEventCreate(&stop));
|
||||
|
||||
|
||||
int main() {
|
||||
hipStream_t stream0 = 0;
|
||||
hipDevice_t device;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtx_t context;
|
||||
hipCtxCreate(&context, 0, device);
|
||||
hipModule_t module;
|
||||
hipFunction_t function;
|
||||
hipModuleLoad(&module, FILE_NAME);
|
||||
hipModuleGetFunction(&function, module, KERNEL_NAME);
|
||||
void* params = nullptr;
|
||||
|
||||
std::array<float, TOTAL_RUN_COUNT> results;
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
hipEventCreate(&stop);
|
||||
|
||||
/************************************************************************************/
|
||||
/* HIP kernel launch enqueue rate: */
|
||||
/* Measure time taken to enqueue a kernel on the GPU */
|
||||
/************************************************************************************/
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
stopTest(start, stop, "FirstKernelLaunch", 1);
|
||||
// Timing hipModuleLaunchKernel
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr);
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
|
||||
}
|
||||
print_timing("hipModuleLaunchKernel enqueue rate", results);
|
||||
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
stopTest(start, stop, "SecondKernelLaunch", 1);
|
||||
// Timing hipLaunchKernelGGL
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
|
||||
auto stop = std::chrono::high_resolution_clock::now();
|
||||
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
|
||||
}
|
||||
print_timing("hipLaunchKernelGGL enqueue rate", results);
|
||||
|
||||
/***********************************************************************************/
|
||||
/* Single dispatch execution latency using HIP events: */
|
||||
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
|
||||
/***********************************************************************************/
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
for (int t = 0; t < TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
hipEventRecord(sync);
|
||||
hipEventSynchronize(sync);
|
||||
}
|
||||
stopTest(start, stop, "NullStreamASyncDispatchWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (p_tests & 0x10) {
|
||||
for (int t = 0; t < TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
|
||||
hipEventRecord(sync);
|
||||
hipEventSynchronize(sync);
|
||||
}
|
||||
stopTest(start, stop, "StreamASyncDispatchWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
#if 1
|
||||
|
||||
if (p_tests & 0x40) {
|
||||
for (int t = 0; t < TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
|
||||
}
|
||||
stopTest(start, stop, "NullStreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
}
|
||||
|
||||
if (p_tests & 0x80) {
|
||||
for (int t = 0; t < TEST_ITERS; t++) {
|
||||
hipEventRecord(start);
|
||||
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
|
||||
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
|
||||
}
|
||||
stopTest(start, stop, "StreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
|
||||
}
|
||||
//Timing directly the dispatch
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&results[i], start, stop);
|
||||
}
|
||||
print_timing("Timing directly single dispatch latency", results);
|
||||
#endif
|
||||
resultDB.DumpSummary(std::cout);
|
||||
|
||||
//Timing around the dispatch
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipEventRecord(start, 0);
|
||||
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&results[i], start, stop);
|
||||
}
|
||||
print_timing("Timing around single dispatch latency", results);
|
||||
|
||||
check(hipEventDestroy(start));
|
||||
check(hipEventDestroy(sync));
|
||||
check(hipEventDestroy(stop));
|
||||
/*********************************************************************************/
|
||||
/* Batch dispatch execution latency using HIP events: */
|
||||
/* Measures latency to start & finish executing each dispatch in a batch */
|
||||
/*********************************************************************************/
|
||||
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
hipEventRecord(start, 0);
|
||||
for (int j = 0; j < BATCH_SIZE; j++) {
|
||||
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
|
||||
}
|
||||
hipEventRecord(stop, 0);
|
||||
hipEventSynchronize(stop);
|
||||
hipEventElapsedTime(&results[i], start, stop);
|
||||
}
|
||||
print_timing("Batch dispatch latency", results, BATCH_SIZE);
|
||||
|
||||
hipEventDestroy(start);
|
||||
hipEventDestroy(stop);
|
||||
hipCtxDestroy(context);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
extern "C" __global__ void test() {
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user