Simplify hipDispatchLatency sample (#1793)

* Use hipExtLaunchKernelGGL in dispatchlatency sample

* Let it run on NVCC path too

* Refactoring

* Add test_kernel source

* Remove ResultDB

* Remove error checks
This commit is contained in:
Rahul Garg
2020-01-22 14:37:47 -08:00
committed by GitHub
parent 3bc69394d0
commit 2eb7ae4f0c
5 changed files with 140 additions and 690 deletions
+6 -7
View File
@@ -2,19 +2,18 @@ HIP_PATH?= $(wildcard /opt/rocm/hip)
ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11
EXE=hipDispatchLatency
CXXFLAGS = -O3
all: ${EXE}
all: test_kernel.code ${EXE}
$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
$(EXE): hipDispatchLatency.cpp
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp -o $@
test_kernel.code: test_kernel.cpp
$(HIP_PATH)/bin/hipcc --genco $(GENCO_FLAGS) $^ -o $@
clean:
rm -f *.o $(EXE)
@@ -1,473 +0,0 @@
#include "ResultDatabase.h"
#include <cfloat>
#include <algorithm>
#include <cmath>
#include <iomanip>
using namespace std;
#define SORT_BY_NAME 0
#define SORT_RETAIN_ATTS_ORDER 1
bool ResultDatabase::Result::operator<(const Result& rhs) const {
if (test < rhs.test) return true;
if (test > rhs.test) return false;
#if (SORT_RETAIN_ATTS_ORDER == 0)
// For ties, sort by the value of the attribute:
if (atts < rhs.atts) return true;
if (atts > rhs.atts) return false;
#endif
return false; // less-operator returns false on equal
}
double ResultDatabase::Result::GetMin() const {
double r = FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = min(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMax() const {
double r = -FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = max(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); }
double ResultDatabase::Result::GetPercentile(double q) const {
int n = value.size();
if (n == 0) return FLT_MAX;
if (n == 1) return value[0];
if (q <= 0) return value[0];
if (q >= 100) return value[n - 1];
double index = ((n + 1.) * q / 100.) - 1;
vector<double> sorted = value;
sort(sorted.begin(), sorted.end());
if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.));
int index_lo = int(index);
double frac = index - index_lo;
if (frac == 0) return sorted[index_lo];
double lo = sorted[index_lo];
double hi = sorted[index_lo + 1];
return lo + (hi - lo) * frac;
}
double ResultDatabase::Result::GetMean() const {
double r = 0;
for (int i = 0; i < value.size(); i++) {
r += value[i];
}
return r / double(value.size());
}
double ResultDatabase::Result::GetStdDev() const {
double r = 0;
double u = GetMean();
if (u == FLT_MAX) return FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r += (value[i] - u) * (value[i] - u);
}
r = sqrt(r / value.size());
return r;
}
void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values) {
for (int i = 0; i < values.size(); i++) {
AddResult(test, atts, unit, values[i]);
}
}
static string RemoveAllButLeadingSpaces(const string& a) {
string b;
int n = a.length();
int i = 0;
while (i < n && a[i] == ' ') {
b += a[i];
++i;
}
for (; i < n; i++) {
if (a[i] != ' ' && a[i] != '\t') b += a[i];
}
return b;
}
void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig,
const string& unit_orig, double value) {
string test = RemoveAllButLeadingSpaces(test_orig);
string atts = RemoveAllButLeadingSpaces(atts_orig);
string unit = RemoveAllButLeadingSpaces(unit_orig);
int index;
for (index = 0; index < results.size(); index++) {
if (results[index].test == test && results[index].atts == atts) {
if (results[index].unit != unit) throw "Internal error: mixed units";
break;
}
}
if (index >= results.size()) {
Result r;
r.test = test;
r.atts = atts;
r.unit = unit;
results.push_back(r);
}
results[index].value.push_back(value);
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the full results, including all trials.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: August 14, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010
// Renamed to DumpDetailed to make room for a DumpSummary.
//
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010
// Changed note about missing values to be worded a little better.
//
// ****************************************************************************
void ResultDatabase::DumpDetailed(ostream& out) {
vector<Result> sorted(results);
#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif
const int testNameW = 24;
const int attW = 12;
const int fieldW = 11;
out << std::fixed << right << std::setprecision(4);
int maxtrials = 1;
for (int i = 0; i < sorted.size(); i++) {
if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size();
}
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
for (int j = 0; j < r.value.size(); j++) {
if (r.value[j] == FLT_MAX)
out << "N/A\t";
else
out << r.value[j] << "\t";
}
out << endl;
}
if (0) {
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
}
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: November 10, 2010
//
// Modifications:
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// ****************************************************************************
void ResultDatabase::DumpSummary(ostream& out) {
vector<Result> sorted(results);
#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif
const int testNameW = 32;
const int attW = 12;
const int fieldW = 9;
out << std::fixed << right << std::setprecision(2);
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t"
<< "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
out << endl;
}
if (0) {
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
}
// ****************************************************************************
// Method: ResultDatabase::ClearAllResults
//
// Purpose:
// Clears all existing results from the ResultDatabase; used for multiple passes
// of the same test or multiple tests.
//
// Arguments:
//
// Programmer: Jeffrey Young
// Creation: September 10th, 2014
//
// Modifications:
//
//
// ****************************************************************************
void ResultDatabase::ClearAllResults() { results.clear(); }
// ****************************************************************************
// Method: ResultDatabase::DumpCsv
//
// Purpose:
// Writes either detailed or summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out file to print CSV results
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
void ResultDatabase::DumpCsv(string fileName) {
bool emptyFile;
vector<Result> sorted(results);
#if SORT_BY_NAME
stable_sort(sorted.begin(), sorted.end());
#endif
// Check to see if the file is empty - if so, add the headers
emptyFile = this->IsFileEmpty(fileName);
// Open file and append by default
ofstream out;
out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app);
// Add headers only for empty files
if (emptyFile) {
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test, "
<< "atts, "
<< "units, "
<< "median, "
<< "mean, "
<< "stddev, "
<< "min, "
<< "max, ";
out << endl;
}
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << r.test << ", ";
out << r.atts << ", ";
out << r.unit << ", ";
if (r.GetMedian() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMedian() << ", ";
if (r.GetMean() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMean() << ", ";
if (r.GetStdDev() == FLT_MAX)
out << "N/A, ";
else
out << r.GetStdDev() << ", ";
if (r.GetMin() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMin() << ", ";
if (r.GetMax() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMax() << ", ";
out << endl;
}
out << endl;
out.close();
}
// ****************************************************************************
// Method: ResultDatabase::IsFileEmpty
//
// Purpose:
// Returns whether a file is empty - used as a helper for CSV printing
//
// Arguments:
// file The input file to check for emptiness
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
bool ResultDatabase::IsFileEmpty(string fileName) {
bool fileEmpty;
ifstream file(fileName.c_str());
// If the file doesn't exist it is by definition empty
if (!file.good()) {
return true;
} else {
fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof());
file.close();
return fileEmpty;
}
// Otherwise, return false
return false;
}
// ****************************************************************************
// Method: ResultDatabase::GetResultsForTest
//
// Purpose:
// Returns a vector of results for just one test name.
//
// Arguments:
// test the name of the test results to search for
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
vector<ResultDatabase::Result> ResultDatabase::GetResultsForTest(const string& test) {
// get only the given test results
vector<Result> retval;
for (int i = 0; i < results.size(); i++) {
Result& r = results[i];
if (r.test == test) retval.push_back(r);
}
return retval;
}
// ****************************************************************************
// Method: ResultDatabase::GetResults
//
// Purpose:
// Returns all the results.
//
// Arguments:
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
const vector<ResultDatabase::Result>& ResultDatabase::GetResults() const { return results; }
@@ -1,89 +0,0 @@
#ifndef RESULT_DATABASE_H
#define RESULT_DATABASE_H
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <cfloat>
using std::ifstream;
using std::ofstream;
using std::ostream;
using std::string;
using std::vector;
// ****************************************************************************
// Class: ResultDatabase
//
// Purpose:
// Track numerical results as they are generated.
// Print statistics of raw results.
//
// Programmer: Jeremy Meredith
// Creation: June 12, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
// Split timing reports into detailed and summary. E.g. for serial code,
// we might report all trial values, but skip them in parallel.
//
// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010
// Added check for missing value tag.
//
// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010
// Added percentile statistic.
//
// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010
// Added a method to extract a subset of results based on test name. Also,
// the Result class is now public, so that clients can use them directly.
// Added a GetResults method as well, and made several functions const.
//
// ****************************************************************************
class ResultDatabase {
public:
//
// A performance result for a single SHOC benchmark run.
//
struct Result {
string test; // e.g. "readback"
string atts; // e.g. "pagelocked 4k^2"
string unit; // e.g. "MB/sec"
vector<double> value; // e.g. "837.14"
double GetMin() const;
double GetMax() const;
double GetMedian() const;
double GetPercentile(double q) const;
double GetMean() const;
double GetStdDev() const;
bool operator<(const Result& rhs) const;
bool HadAnyFLTMAXValues() const {
for (int i = 0; i < value.size(); ++i) {
if (value[i] >= FLT_MAX) return true;
}
return false;
}
};
protected:
vector<Result> results;
public:
void AddResult(const string& test, const string& atts, const string& unit, double value);
void AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values);
vector<Result> GetResultsForTest(const string& test);
const vector<Result>& GetResults() const;
void ClearAllResults();
void DumpDetailed(ostream&);
void DumpSummary(ostream&);
void DumpCsv(string fileName);
private:
bool IsFileEmpty(string fileName);
};
#endif
@@ -1,16 +1,13 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -21,142 +18,134 @@ THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#ifdef __HIP_PLATFORM_HCC__
#include "hip/hip_ext.h"
#endif
#include <iostream>
#include <time.h>
#include "ResultDatabase.h"
#define PRINT_PROGRESS 0
#define check(cmd) \
{ \
hipError_t status = cmd; \
if (status != hipSuccess) { \
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(status), status, #cmd, \
__FILE__, __LINE__); \
abort(); \
} \
}
#define LEN 1024 * 1024
#include <chrono>
#include <algorithm>
#define NUM_GROUPS 1
#define GROUP_SIZE 64
#define TEST_ITERS 20
#define DISPATCHES_PER_TEST 100
#define GROUP_SIZE 1
#define WARMUP_RUN_COUNT 10
#define TIMING_RUN_COUNT 100
#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT
#define BATCH_SIZE 1000
const unsigned p_tests = 0xfffffff;
#define FILE_NAME "test_kernel.code"
#define KERNEL_NAME "test"
__global__ void EmptyKernel() { }
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
__global__ void NearlyNull(float* Ad) {
if (Ad) {
Ad[0] = 42;
}
void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1) {
float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f;
// skip warm-up runs
auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT);
auto end_iter = results.end();
// mean
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
total_us += (run_ms * 1000) / batch;
});
mean_us = total_us / TIMING_RUN_COUNT;
// stddev
total_us = 0;
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
float dev_us = ((run_ms * 1000) / batch) - mean_us;
total_us += dev_us * dev_us;
});
stddev_us = sqrt(total_us / TIMING_RUN_COUNT);
// display
printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us);
}
ResultDatabase resultDB;
void stopTest(hipEvent_t start, hipEvent_t stop, const char* msg, int iters) {
float mS = 0;
check(hipEventRecord(stop));
check(hipDeviceSynchronize());
check(hipEventElapsedTime(&mS, start, stop));
resultDB.AddResult(std::string(msg), "", "uS", mS * 1000 / iters);
if (PRINT_PROGRESS & 0x1) {
std::cout << msg << "\t\t" << mS * 1000 / iters << " uS" << std::endl;
}
if (PRINT_PROGRESS & 0x2) {
resultDB.DumpSummary(std::cout);
}
}
int main() {
hipError_t err;
float* Ad;
check(hipMalloc(&Ad, 4));
hipStream_t stream;
check(hipStreamCreate(&stream));
hipEvent_t start, sync, stop;
check(hipEventCreate(&start));
check(hipEventCreateWithFlags(&sync, hipEventBlockingSync));
check(hipEventCreate(&stop));
int main() {
hipStream_t stream0 = 0;
hipDevice_t device;
hipDeviceGet(&device, 0);
hipCtx_t context;
hipCtxCreate(&context, 0, device);
hipModule_t module;
hipFunction_t function;
hipModuleLoad(&module, FILE_NAME);
hipModuleGetFunction(&function, module, KERNEL_NAME);
void* params = nullptr;
std::array<float, TOTAL_RUN_COUNT> results;
hipEvent_t start, stop;
hipEventCreate(&start);
hipEventCreate(&stop);
/************************************************************************************/
/* HIP kernel launch enqueue rate: */
/* Measure time taken to enqueue a kernel on the GPU */
/************************************************************************************/
if (p_tests & 0x1) {
hipEventRecord(start);
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
stopTest(start, stop, "FirstKernelLaunch", 1);
// Timing hipModuleLaunchKernel
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
auto start = std::chrono::high_resolution_clock::now();
hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, &params, nullptr);
auto stop = std::chrono::high_resolution_clock::now();
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
}
print_timing("hipModuleLaunchKernel enqueue rate", results);
if (p_tests & 0x2) {
hipEventRecord(start);
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
stopTest(start, stop, "SecondKernelLaunch", 1);
// Timing hipLaunchKernelGGL
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
auto start = std::chrono::high_resolution_clock::now();
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
auto stop = std::chrono::high_resolution_clock::now();
results[i] = std::chrono::duration<float, std::milli>(stop - start).count();
}
print_timing("hipLaunchKernelGGL enqueue rate", results);
/***********************************************************************************/
/* Single dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/***********************************************************************************/
if (p_tests & 0x4) {
for (int t = 0; t < TEST_ITERS; t++) {
hipEventRecord(start);
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
hipEventRecord(sync);
hipEventSynchronize(sync);
}
stopTest(start, stop, "NullStreamASyncDispatchWait", DISPATCHES_PER_TEST);
}
}
if (p_tests & 0x10) {
for (int t = 0; t < TEST_ITERS; t++) {
hipEventRecord(start);
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
hipEventRecord(sync);
hipEventSynchronize(sync);
}
stopTest(start, stop, "StreamASyncDispatchWait", DISPATCHES_PER_TEST);
}
}
#if 1
if (p_tests & 0x40) {
for (int t = 0; t < TEST_ITERS; t++) {
hipEventRecord(start);
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad);
}
stopTest(start, stop, "NullStreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
}
}
if (p_tests & 0x80) {
for (int t = 0; t < TEST_ITERS; t++) {
hipEventRecord(start);
for (int i = 0; i < DISPATCHES_PER_TEST; i++) {
hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad);
}
stopTest(start, stop, "StreamASyncDispatchNoWait", DISPATCHES_PER_TEST);
}
//Timing directly the dispatch
#ifdef __HIP_PLATFORM_HCC__
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
}
print_timing("Timing directly single dispatch latency", results);
#endif
resultDB.DumpSummary(std::cout);
//Timing around the dispatch
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipEventRecord(start, 0);
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
}
print_timing("Timing around single dispatch latency", results);
check(hipEventDestroy(start));
check(hipEventDestroy(sync));
check(hipEventDestroy(stop));
/*********************************************************************************/
/* Batch dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing each dispatch in a batch */
/*********************************************************************************/
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
hipEventRecord(start, 0);
for (int j = 0; j < BATCH_SIZE; j++) {
hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0);
}
hipEventRecord(stop, 0);
hipEventSynchronize(stop);
hipEventElapsedTime(&results[i], start, stop);
}
print_timing("Batch dispatch latency", results, BATCH_SIZE);
hipEventDestroy(start);
hipEventDestroy(stop);
hipCtxDestroy(context);
}
@@ -0,0 +1,24 @@
/*
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
extern "C" __global__ void test() {
}