diff --git a/samples/1_Utils/hipDispatchLatency/Makefile b/samples/1_Utils/hipDispatchLatency/Makefile index 3b69c4a335..0616f01f0d 100644 --- a/samples/1_Utils/hipDispatchLatency/Makefile +++ b/samples/1_Utils/hipDispatchLatency/Makefile @@ -2,19 +2,18 @@ HIP_PATH?= $(wildcard /opt/rocm/hip) ifeq (,$(HIP_PATH)) HIP_PATH=../../.. endif -HIPCC=$(HIP_PATH)/bin/hipcc +HIPCC=$(HIP_PATH)/bin/hipcc -std=c++11 EXE=hipDispatchLatency CXXFLAGS = -O3 -all: ${EXE} +all: test_kernel.code ${EXE} -$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp - $(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@ - -install: $(EXE) - cp $(EXE) $(HIP_PATH)/bin +$(EXE): hipDispatchLatency.cpp + $(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp -o $@ +test_kernel.code: test_kernel.cpp + $(HIP_PATH)/bin/hipcc --genco $(GENCO_FLAGS) $^ -o $@ clean: rm -f *.o $(EXE) diff --git a/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp b/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp deleted file mode 100644 index b769ca4b32..0000000000 --- a/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp +++ /dev/null @@ -1,473 +0,0 @@ -#include "ResultDatabase.h" - -#include -#include -#include -#include - -using namespace std; - -#define SORT_BY_NAME 0 -#define SORT_RETAIN_ATTS_ORDER 1 - - -bool ResultDatabase::Result::operator<(const Result& rhs) const { - if (test < rhs.test) return true; - if (test > rhs.test) return false; -#if (SORT_RETAIN_ATTS_ORDER == 0) - // For ties, sort by the value of the attribute: - if (atts < rhs.atts) return true; - if (atts > rhs.atts) return false; -#endif - return false; // less-operator returns false on equal -} - -double ResultDatabase::Result::GetMin() const { - double r = FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = min(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMax() const { - double r = -FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = max(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); } - -double ResultDatabase::Result::GetPercentile(double q) const { - int n = value.size(); - if (n == 0) return FLT_MAX; - if (n == 1) return value[0]; - - if (q <= 0) return value[0]; - if (q >= 100) return value[n - 1]; - - double index = ((n + 1.) * q / 100.) - 1; - - vector sorted = value; - sort(sorted.begin(), sorted.end()); - - if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.)); - - int index_lo = int(index); - double frac = index - index_lo; - if (frac == 0) return sorted[index_lo]; - - double lo = sorted[index_lo]; - double hi = sorted[index_lo + 1]; - return lo + (hi - lo) * frac; -} - -double ResultDatabase::Result::GetMean() const { - double r = 0; - for (int i = 0; i < value.size(); i++) { - r += value[i]; - } - return r / double(value.size()); -} - -double ResultDatabase::Result::GetStdDev() const { - double r = 0; - double u = GetMean(); - if (u == FLT_MAX) return FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r += (value[i] - u) * (value[i] - u); - } - r = sqrt(r / value.size()); - return r; -} - - -void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit, - const vector& values) { - for (int i = 0; i < values.size(); i++) { - AddResult(test, atts, unit, values[i]); - } -} - -static string RemoveAllButLeadingSpaces(const string& a) { - string b; - int n = a.length(); - int i = 0; - while (i < n && a[i] == ' ') { - b += a[i]; - ++i; - } - for (; i < n; i++) { - if (a[i] != ' ' && a[i] != '\t') b += a[i]; - } - return b; -} - -void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig, - const string& unit_orig, double value) { - string test = RemoveAllButLeadingSpaces(test_orig); - string atts = RemoveAllButLeadingSpaces(atts_orig); - string unit = RemoveAllButLeadingSpaces(unit_orig); - int index; - for (index = 0; index < results.size(); index++) { - if (results[index].test == test && results[index].atts == atts) { - if (results[index].unit != unit) throw "Internal error: mixed units"; - - break; - } - } - - if (index >= results.size()) { - Result r; - r.test = test; - r.atts = atts; - r.unit = unit; - results.push_back(r); - } - - results[index].value.push_back(value); -} - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the full results, including all trials. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: August 14, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010 -// Renamed to DumpDetailed to make room for a DumpSummary. -// -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010 -// Changed note about missing values to be worded a little better. -// -// **************************************************************************** -void ResultDatabase::DumpDetailed(ostream& out) { - vector sorted(results); - -#if SORT_BY_NAME - stable_sort(sorted.begin(), sorted.end()); -#endif - - const int testNameW = 24; - const int attW = 12; - const int fieldW = 11; - out << std::fixed << right << std::setprecision(4); - - int maxtrials = 1; - for (int i = 0; i < sorted.size(); i++) { - if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size(); - } - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - for (int j = 0; j < r.value.size(); j++) { - if (r.value[j] == FLT_MAX) - out << "N/A\t"; - else - out << r.value[j] << "\t"; - } - - out << endl; - } - if (0) { - out << endl - << "Note: Any results marked with (*) had missing values." << endl - << " This can occur on systems with a mixture of" << endl - << " device types or architectural capabilities." << endl; - } -} - - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: November 10, 2010 -// -// Modifications: -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// **************************************************************************** -void ResultDatabase::DumpSummary(ostream& out) { - vector sorted(results); - -#if SORT_BY_NAME - stable_sort(sorted.begin(), sorted.end()); -#endif - - const int testNameW = 32; - const int attW = 12; - const int fieldW = 9; - out << std::fixed << right << std::setprecision(2); - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t" - << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - - out << endl; - } - if (0) { - out << endl - << "Note: results marked with (*) had missing values such as" << endl - << "might occur with a mixture of architectural capabilities." << endl; - } -} - -// **************************************************************************** -// Method: ResultDatabase::ClearAllResults -// -// Purpose: -// Clears all existing results from the ResultDatabase; used for multiple passes -// of the same test or multiple tests. -// -// Arguments: -// -// Programmer: Jeffrey Young -// Creation: September 10th, 2014 -// -// Modifications: -// -// -// **************************************************************************** -void ResultDatabase::ClearAllResults() { results.clear(); } - -// **************************************************************************** -// Method: ResultDatabase::DumpCsv -// -// Purpose: -// Writes either detailed or summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out file to print CSV results -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** -void ResultDatabase::DumpCsv(string fileName) { - bool emptyFile; - vector sorted(results); - -#if SORT_BY_NAME - stable_sort(sorted.begin(), sorted.end()); -#endif - - // Check to see if the file is empty - if so, add the headers - emptyFile = this->IsFileEmpty(fileName); - - // Open file and append by default - ofstream out; - out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app); - - // Add headers only for empty files - if (emptyFile) { - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << "test, " - << "atts, " - << "units, " - << "median, " - << "mean, " - << "stddev, " - << "min, " - << "max, "; - out << endl; - } - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << r.test << ", "; - out << r.atts << ", "; - out << r.unit << ", "; - if (r.GetMedian() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMedian() << ", "; - if (r.GetMean() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMean() << ", "; - if (r.GetStdDev() == FLT_MAX) - out << "N/A, "; - else - out << r.GetStdDev() << ", "; - if (r.GetMin() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMin() << ", "; - if (r.GetMax() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMax() << ", "; - - out << endl; - } - out << endl; - - out.close(); -} - -// **************************************************************************** -// Method: ResultDatabase::IsFileEmpty -// -// Purpose: -// Returns whether a file is empty - used as a helper for CSV printing -// -// Arguments: -// file The input file to check for emptiness -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** - -bool ResultDatabase::IsFileEmpty(string fileName) { - bool fileEmpty; - - ifstream file(fileName.c_str()); - - // If the file doesn't exist it is by definition empty - if (!file.good()) { - return true; - } else { - fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof()); - file.close(); - - return fileEmpty; - } - - // Otherwise, return false - return false; -} - - -// **************************************************************************** -// Method: ResultDatabase::GetResultsForTest -// -// Purpose: -// Returns a vector of results for just one test name. -// -// Arguments: -// test the name of the test results to search for -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -vector ResultDatabase::GetResultsForTest(const string& test) { - // get only the given test results - vector retval; - for (int i = 0; i < results.size(); i++) { - Result& r = results[i]; - if (r.test == test) retval.push_back(r); - } - return retval; -} - -// **************************************************************************** -// Method: ResultDatabase::GetResults -// -// Purpose: -// Returns all the results. -// -// Arguments: -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -const vector& ResultDatabase::GetResults() const { return results; } diff --git a/samples/1_Utils/hipDispatchLatency/ResultDatabase.h b/samples/1_Utils/hipDispatchLatency/ResultDatabase.h deleted file mode 100644 index ca6a00fc91..0000000000 --- a/samples/1_Utils/hipDispatchLatency/ResultDatabase.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef RESULT_DATABASE_H -#define RESULT_DATABASE_H - -#include -#include -#include -#include -#include -using std::ifstream; -using std::ofstream; -using std::ostream; -using std::string; -using std::vector; - - -// **************************************************************************** -// Class: ResultDatabase -// -// Purpose: -// Track numerical results as they are generated. -// Print statistics of raw results. -// -// Programmer: Jeremy Meredith -// Creation: June 12, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 -// Split timing reports into detailed and summary. E.g. for serial code, -// we might report all trial values, but skip them in parallel. -// -// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010 -// Added check for missing value tag. -// -// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010 -// Added percentile statistic. -// -// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010 -// Added a method to extract a subset of results based on test name. Also, -// the Result class is now public, so that clients can use them directly. -// Added a GetResults method as well, and made several functions const. -// -// **************************************************************************** -class ResultDatabase { - public: - // - // A performance result for a single SHOC benchmark run. - // - struct Result { - string test; // e.g. "readback" - string atts; // e.g. "pagelocked 4k^2" - string unit; // e.g. "MB/sec" - vector value; // e.g. "837.14" - double GetMin() const; - double GetMax() const; - double GetMedian() const; - double GetPercentile(double q) const; - double GetMean() const; - double GetStdDev() const; - - bool operator<(const Result& rhs) const; - - bool HadAnyFLTMAXValues() const { - for (int i = 0; i < value.size(); ++i) { - if (value[i] >= FLT_MAX) return true; - } - return false; - } - }; - - protected: - vector results; - - public: - void AddResult(const string& test, const string& atts, const string& unit, double value); - void AddResults(const string& test, const string& atts, const string& unit, - const vector& values); - vector GetResultsForTest(const string& test); - const vector& GetResults() const; - void ClearAllResults(); - void DumpDetailed(ostream&); - void DumpSummary(ostream&); - void DumpCsv(string fileName); - - private: - bool IsFileEmpty(string fileName); -}; - - -#endif diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index 9d384c7d6a..625d8cd742 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -1,16 +1,13 @@ /* Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -21,142 +18,134 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" +#ifdef __HIP_PLATFORM_HCC__ +#include "hip/hip_ext.h" +#endif #include -#include -#include "ResultDatabase.h" - -#define PRINT_PROGRESS 0 - -#define check(cmd) \ - { \ - hipError_t status = cmd; \ - if (status != hipSuccess) { \ - printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(status), status, #cmd, \ - __FILE__, __LINE__); \ - abort(); \ - } \ - } - -#define LEN 1024 * 1024 +#include +#include #define NUM_GROUPS 1 -#define GROUP_SIZE 64 -#define TEST_ITERS 20 -#define DISPATCHES_PER_TEST 100 +#define GROUP_SIZE 1 +#define WARMUP_RUN_COUNT 10 +#define TIMING_RUN_COUNT 100 +#define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT +#define BATCH_SIZE 1000 -const unsigned p_tests = 0xfffffff; +#define FILE_NAME "test_kernel.code" +#define KERNEL_NAME "test" +__global__ void EmptyKernel() { } -// HCC optimizes away fully NULL kernel calls, so run one that is nearly null: -__global__ void NearlyNull(float* Ad) { - if (Ad) { - Ad[0] = 42; - } +void print_timing(std::string test, const std::array &results, int batch = 1) { + + float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f; + + // skip warm-up runs + auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT); + auto end_iter = results.end(); + + // mean + std::for_each(start_iter, end_iter, [&](const float &run_ms) { + total_us += (run_ms * 1000) / batch; + }); + mean_us = total_us / TIMING_RUN_COUNT; + + // stddev + total_us = 0; + std::for_each(start_iter, end_iter, [&](const float &run_ms) { + float dev_us = ((run_ms * 1000) / batch) - mean_us; + total_us += dev_us * dev_us; + }); + stddev_us = sqrt(total_us / TIMING_RUN_COUNT); + + // display + printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us); } - -ResultDatabase resultDB; - - -void stopTest(hipEvent_t start, hipEvent_t stop, const char* msg, int iters) { - float mS = 0; - check(hipEventRecord(stop)); - check(hipDeviceSynchronize()); - check(hipEventElapsedTime(&mS, start, stop)); - resultDB.AddResult(std::string(msg), "", "uS", mS * 1000 / iters); - if (PRINT_PROGRESS & 0x1) { - std::cout << msg << "\t\t" << mS * 1000 / iters << " uS" << std::endl; - } - if (PRINT_PROGRESS & 0x2) { - resultDB.DumpSummary(std::cout); - } -} - - -int main() { - hipError_t err; - float* Ad; - check(hipMalloc(&Ad, 4)); - - - hipStream_t stream; - check(hipStreamCreate(&stream)); - - - hipEvent_t start, sync, stop; - check(hipEventCreate(&start)); - check(hipEventCreateWithFlags(&sync, hipEventBlockingSync)); - check(hipEventCreate(&stop)); - - +int main() { hipStream_t stream0 = 0; + hipDevice_t device; + hipDeviceGet(&device, 0); + hipCtx_t context; + hipCtxCreate(&context, 0, device); + hipModule_t module; + hipFunction_t function; + hipModuleLoad(&module, FILE_NAME); + hipModuleGetFunction(&function, module, KERNEL_NAME); + void* params = nullptr; + + std::array results; + hipEvent_t start, stop; + hipEventCreate(&start); + hipEventCreate(&stop); + /************************************************************************************/ + /* HIP kernel launch enqueue rate: */ + /* Measure time taken to enqueue a kernel on the GPU */ + /************************************************************************************/ - if (p_tests & 0x1) { - hipEventRecord(start); - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad); - stopTest(start, stop, "FirstKernelLaunch", 1); + // Timing hipModuleLaunchKernel + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + auto start = std::chrono::high_resolution_clock::now(); + hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, 0, ¶ms, nullptr); + auto stop = std::chrono::high_resolution_clock::now(); + results[i] = std::chrono::duration(stop - start).count(); } + print_timing("hipModuleLaunchKernel enqueue rate", results); - - if (p_tests & 0x2) { - hipEventRecord(start); - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad); - stopTest(start, stop, "SecondKernelLaunch", 1); + // Timing hipLaunchKernelGGL + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + auto start = std::chrono::high_resolution_clock::now(); + hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0); + auto stop = std::chrono::high_resolution_clock::now(); + results[i] = std::chrono::duration(stop - start).count(); } + print_timing("hipLaunchKernelGGL enqueue rate", results); + /***********************************************************************************/ + /* Single dispatch execution latency using HIP events: */ + /* Measures latency to start & finish executing a kernel with GPU-scope visibility */ + /***********************************************************************************/ - if (p_tests & 0x4) { - for (int t = 0; t < TEST_ITERS; t++) { - hipEventRecord(start); - for (int i = 0; i < DISPATCHES_PER_TEST; i++) { - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad); - hipEventRecord(sync); - hipEventSynchronize(sync); - } - stopTest(start, stop, "NullStreamASyncDispatchWait", DISPATCHES_PER_TEST); - } - } - - - if (p_tests & 0x10) { - for (int t = 0; t < TEST_ITERS; t++) { - hipEventRecord(start); - for (int i = 0; i < DISPATCHES_PER_TEST; i++) { - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad); - hipEventRecord(sync); - hipEventSynchronize(sync); - } - stopTest(start, stop, "StreamASyncDispatchWait", DISPATCHES_PER_TEST); - } - } - -#if 1 - - if (p_tests & 0x40) { - for (int t = 0; t < TEST_ITERS; t++) { - hipEventRecord(start); - for (int i = 0; i < DISPATCHES_PER_TEST; i++) { - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, Ad); - } - stopTest(start, stop, "NullStreamASyncDispatchNoWait", DISPATCHES_PER_TEST); - } - } - - if (p_tests & 0x80) { - for (int t = 0; t < TEST_ITERS; t++) { - hipEventRecord(start); - for (int i = 0; i < DISPATCHES_PER_TEST; i++) { - hipLaunchKernelGGL(NearlyNull, dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream, Ad); - } - stopTest(start, stop, "StreamASyncDispatchNoWait", DISPATCHES_PER_TEST); - } + //Timing directly the dispatch +#ifdef __HIP_PLATFORM_HCC__ + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + hipExtLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0, start, stop, 0); + hipEventSynchronize(stop); + hipEventElapsedTime(&results[i], start, stop); } + print_timing("Timing directly single dispatch latency", results); #endif - resultDB.DumpSummary(std::cout); + //Timing around the dispatch + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + hipEventRecord(start, 0); + hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0); + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + hipEventElapsedTime(&results[i], start, stop); + } + print_timing("Timing around single dispatch latency", results); - check(hipEventDestroy(start)); - check(hipEventDestroy(sync)); - check(hipEventDestroy(stop)); + /*********************************************************************************/ + /* Batch dispatch execution latency using HIP events: */ + /* Measures latency to start & finish executing each dispatch in a batch */ + /*********************************************************************************/ + + for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) { + hipEventRecord(start, 0); + for (int j = 0; j < BATCH_SIZE; j++) { + hipLaunchKernelGGL((EmptyKernel), dim3(NUM_GROUPS), dim3(GROUP_SIZE), 0, stream0); + } + hipEventRecord(stop, 0); + hipEventSynchronize(stop); + hipEventElapsedTime(&results[i], start, stop); + } + print_timing("Batch dispatch latency", results, BATCH_SIZE); + + hipEventDestroy(start); + hipEventDestroy(stop); + hipCtxDestroy(context); } + diff --git a/samples/1_Utils/hipDispatchLatency/test_kernel.cpp b/samples/1_Utils/hipDispatchLatency/test_kernel.cpp new file mode 100644 index 0000000000..23ef426730 --- /dev/null +++ b/samples/1_Utils/hipDispatchLatency/test_kernel.cpp @@ -0,0 +1,24 @@ +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "hip/hip_runtime.h" + +extern "C" __global__ void test() { +} +