diff --git a/.gitignore b/.gitignore index f3f605803c..4a76ca2cf4 100644 --- a/.gitignore +++ b/.gitignore @@ -13,5 +13,4 @@ samples/0_Intro/module_api/vcpy_isa.hsaco samples/0_Intro/module_api/vcpy_kernel.co samples/0_Intro/module_api/vcpy_kernel.code samples/1_Utils/hipInfo/hipInfo -samples/1_Utils/hipBusBandwidth/hipBusBandwidth samples/1_Utils/hipDispatchLatency/hipDispatchLatency diff --git a/samples/1_Utils/hipBusBandwidth/CMakeLists.txt b/samples/1_Utils/hipBusBandwidth/CMakeLists.txt deleted file mode 100644 index bfebb89e56..0000000000 --- a/samples/1_Utils/hipBusBandwidth/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -project(hipBusBandwidth) - -cmake_minimum_required(VERSION 3.10) - -if (NOT DEFINED ROCM_PATH ) - set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) -endif () - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipBusBandwidth hipBusBandwidth.cpp ResultDatabase.cpp) - -# Link with HIP -target_link_libraries(hipBusBandwidth hip::host) \ No newline at end of file diff --git a/samples/1_Utils/hipBusBandwidth/LICENSE.txt b/samples/1_Utils/hipBusBandwidth/LICENSE.txt deleted file mode 100644 index 5d0d603232..0000000000 --- a/samples/1_Utils/hipBusBandwidth/LICENSE.txt +++ /dev/null @@ -1,27 +0,0 @@ - -Copyright (c) 2011, UT-Battelle, LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor - the names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/samples/1_Utils/hipBusBandwidth/Makefile b/samples/1_Utils/hipBusBandwidth/Makefile deleted file mode 100644 index 5aad9411e3..0000000000 --- a/samples/1_Utils/hipBusBandwidth/Makefile +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -ifeq ($(OS),Windows_NT) - $(error Makefile is not supported on windows platform. Please use cmake instead to build sample.) -endif -ROCM_PATH?= $(wildcard /opt/rocm/) -HIP_PATH?= $(wildcard $(ROCM_PATH)/hip) -ifeq (,$(HIP_PATH)) - HIP_PATH=../../.. -endif -HIPCC=$(HIP_PATH)/bin/hipcc - -EXE=hipBusBandwidth -CXXFLAGS = -O3 - -all: install - -$(EXE): hipBusBandwidth.cpp ResultDatabase.cpp - $(HIPCC) $(CXXFLAGS) $^ -o $@ - -install: $(EXE) - cp $(EXE) $(HIP_PATH)/bin - - -clean: - rm -f *.o $(EXE) diff --git a/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp b/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp deleted file mode 100644 index e094f70d07..0000000000 --- a/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp +++ /dev/null @@ -1,462 +0,0 @@ -#include "ResultDatabase.h" - -#include -#include -#include -#include - -using namespace std; - -#define SORT_RETAIN_ATTS_ORDER 1 - - -bool ResultDatabase::Result::operator<(const Result& rhs) const { - if (test < rhs.test) return true; - if (test > rhs.test) return false; -#if (SORT_RETAIN_ATTS_ORDER == 0) - // For ties, sort by the value of the attribute: - if (atts < rhs.atts) return true; - if (atts > rhs.atts) return false; -#endif - return false; // less-operator returns false on equal -} - -double ResultDatabase::Result::GetMin() const { - double r = FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = min(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMax() const { - double r = -FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = max(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); } - -double ResultDatabase::Result::GetPercentile(double q) const { - int n = value.size(); - if (n == 0) return FLT_MAX; - if (n == 1) return value[0]; - - if (q <= 0) return value[0]; - if (q >= 100) return value[n - 1]; - - double index = ((n + 1.) * q / 100.) - 1; - - vector sorted = value; - sort(sorted.begin(), sorted.end()); - - if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.)); - - int index_lo = int(index); - double frac = index - index_lo; - if (frac == 0) return sorted[index_lo]; - - double lo = sorted[index_lo]; - double hi = sorted[index_lo + 1]; - return lo + (hi - lo) * frac; -} - -double ResultDatabase::Result::GetMean() const { - double r = 0; - for (int i = 0; i < value.size(); i++) { - r += value[i]; - } - return r / double(value.size()); -} - -double ResultDatabase::Result::GetStdDev() const { - double r = 0; - double u = GetMean(); - if (u == FLT_MAX) return FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r += (value[i] - u) * (value[i] - u); - } - r = sqrt(r / value.size()); - return r; -} - - -void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit, - const vector& values) { - for (int i = 0; i < values.size(); i++) { - AddResult(test, atts, unit, values[i]); - } -} - -static string RemoveAllButLeadingSpaces(const string& a) { - string b; - int n = a.length(); - int i = 0; - while (i < n && a[i] == ' ') { - b += a[i]; - ++i; - } - for (; i < n; i++) { - if (a[i] != ' ' && a[i] != '\t') b += a[i]; - } - return b; -} - -void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig, - const string& unit_orig, double value) { - string test = RemoveAllButLeadingSpaces(test_orig); - string atts = RemoveAllButLeadingSpaces(atts_orig); - string unit = RemoveAllButLeadingSpaces(unit_orig); - int index; - for (index = 0; index < results.size(); index++) { - if (results[index].test == test && results[index].atts == atts) { - if (results[index].unit != unit) throw "Internal error: mixed units"; - - break; - } - } - - if (index >= results.size()) { - Result r; - r.test = test; - r.atts = atts; - r.unit = unit; - results.push_back(r); - } - - results[index].value.push_back(value); -} - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the full results, including all trials. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: August 14, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010 -// Renamed to DumpDetailed to make room for a DumpSummary. -// -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010 -// Changed note about missing values to be worded a little better. -// -// **************************************************************************** -void ResultDatabase::DumpDetailed(ostream& out) { - vector sorted(results); - - stable_sort(sorted.begin(), sorted.end()); - - const int testNameW = 24; - const int attW = 12; - const int fieldW = 11; - out << std::fixed << right << std::setprecision(4); - - int maxtrials = 1; - for (int i = 0; i < sorted.size(); i++) { - if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size(); - } - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - for (int j = 0; j < r.value.size(); j++) { - if (r.value[j] == FLT_MAX) - out << "N/A\t"; - else - out << r.value[j] << "\t"; - } - - out << endl; - } - out << endl - << "Note: Any results marked with (*) had missing values." << endl - << " This can occur on systems with a mixture of" << endl - << " device types or architectural capabilities." << endl; -} - - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: November 10, 2010 -// -// Modifications: -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// **************************************************************************** -void ResultDatabase::DumpSummary(ostream& out) { - vector sorted(results); - - stable_sort(sorted.begin(), sorted.end()); - - const int testNameW = 24; - const int attW = 12; - const int fieldW = 9; - out << std::fixed << right << std::setprecision(4); - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t" - << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - - out << endl; - } - out << endl - << "Note: results marked with (*) had missing values such as" << endl - << "might occur with a mixture of architectural capabilities." << endl; -} - -// **************************************************************************** -// Method: ResultDatabase::ClearAllResults -// -// Purpose: -// Clears all existing results from the ResultDatabase; used for multiple passes -// of the same test or multiple tests. -// -// Arguments: -// -// Programmer: Jeffrey Young -// Creation: September 10th, 2014 -// -// Modifications: -// -// -// **************************************************************************** -void ResultDatabase::ClearAllResults() { results.clear(); } - -// **************************************************************************** -// Method: ResultDatabase::DumpCsv -// -// Purpose: -// Writes either detailed or summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out file to print CSV results -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** -void ResultDatabase::DumpCsv(string fileName) { - bool emptyFile; - vector sorted(results); - - stable_sort(sorted.begin(), sorted.end()); - - // Check to see if the file is empty - if so, add the headers - emptyFile = this->IsFileEmpty(fileName); - - // Open file and append by default - ofstream out; - out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app); - - // Add headers only for empty files - if (emptyFile) { - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << "test, " - << "atts, " - << "units, " - << "median, " - << "mean, " - << "stddev, " - << "min, " - << "max, "; - out << endl; - } - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << r.test << ", "; - out << r.atts << ", "; - out << r.unit << ", "; - if (r.GetMedian() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMedian() << ", "; - if (r.GetMean() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMean() << ", "; - if (r.GetStdDev() == FLT_MAX) - out << "N/A, "; - else - out << r.GetStdDev() << ", "; - if (r.GetMin() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMin() << ", "; - if (r.GetMax() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMax() << ", "; - - out << endl; - } - out << endl; - - out.close(); -} - -// **************************************************************************** -// Method: ResultDatabase::IsFileEmpty -// -// Purpose: -// Returns whether a file is empty - used as a helper for CSV printing -// -// Arguments: -// file The input file to check for emptiness -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** - -bool ResultDatabase::IsFileEmpty(string fileName) { - - ifstream file(fileName.c_str()); - - // If the file doesn't exist it is by definition empty - if (!file.good()) { - return true; - } else { - bool fileEmpty; - fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof()); - file.close(); - - return fileEmpty; - } - - // Otherwise, return false - return false; -} - - -// **************************************************************************** -// Method: ResultDatabase::GetResultsForTest -// -// Purpose: -// Returns a vector of results for just one test name. -// -// Arguments: -// test the name of the test results to search for -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -vector ResultDatabase::GetResultsForTest(const string& test) { - // get only the given test results - vector retval; - for (int i = 0; i < results.size(); i++) { - Result& r = results[i]; - if (r.test == test) retval.push_back(r); - } - return retval; -} - -// **************************************************************************** -// Method: ResultDatabase::GetResults -// -// Purpose: -// Returns all the results. -// -// Arguments: -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -const vector& ResultDatabase::GetResults() const { return results; } diff --git a/samples/1_Utils/hipBusBandwidth/ResultDatabase.h b/samples/1_Utils/hipBusBandwidth/ResultDatabase.h deleted file mode 100644 index ca6a00fc91..0000000000 --- a/samples/1_Utils/hipBusBandwidth/ResultDatabase.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef RESULT_DATABASE_H -#define RESULT_DATABASE_H - -#include -#include -#include -#include -#include -using std::ifstream; -using std::ofstream; -using std::ostream; -using std::string; -using std::vector; - - -// **************************************************************************** -// Class: ResultDatabase -// -// Purpose: -// Track numerical results as they are generated. -// Print statistics of raw results. -// -// Programmer: Jeremy Meredith -// Creation: June 12, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 -// Split timing reports into detailed and summary. E.g. for serial code, -// we might report all trial values, but skip them in parallel. -// -// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010 -// Added check for missing value tag. -// -// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010 -// Added percentile statistic. -// -// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010 -// Added a method to extract a subset of results based on test name. Also, -// the Result class is now public, so that clients can use them directly. -// Added a GetResults method as well, and made several functions const. -// -// **************************************************************************** -class ResultDatabase { - public: - // - // A performance result for a single SHOC benchmark run. - // - struct Result { - string test; // e.g. "readback" - string atts; // e.g. "pagelocked 4k^2" - string unit; // e.g. "MB/sec" - vector value; // e.g. "837.14" - double GetMin() const; - double GetMax() const; - double GetMedian() const; - double GetPercentile(double q) const; - double GetMean() const; - double GetStdDev() const; - - bool operator<(const Result& rhs) const; - - bool HadAnyFLTMAXValues() const { - for (int i = 0; i < value.size(); ++i) { - if (value[i] >= FLT_MAX) return true; - } - return false; - } - }; - - protected: - vector results; - - public: - void AddResult(const string& test, const string& atts, const string& unit, double value); - void AddResults(const string& test, const string& atts, const string& unit, - const vector& values); - vector GetResultsForTest(const string& test); - const vector& GetResults() const; - void ClearAllResults(); - void DumpDetailed(ostream&); - void DumpSummary(ostream&); - void DumpCsv(string fileName); - - private: - bool IsFileEmpty(string fileName); -}; - - -#endif diff --git a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp deleted file mode 100644 index 5a7e45f6bf..0000000000 --- a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ /dev/null @@ -1,1072 +0,0 @@ -#include -#include -#include -#include -#include "hip/hip_runtime.h" - -#include "ResultDatabase.h" - -enum MallocMode { MallocPinned, MallocUnpinned, MallocRegistered }; - -// Cmdline parms: -bool p_verbose = false; -MallocMode p_malloc_mode = MallocPinned; -int p_numa_ctl = -1; -int p_iterations = 0; -int p_beatsperiteration = 1; -int p_device = 0; -int p_detailed = 0; -bool p_async = 0; -int p_alignedhost = - 0; // align host allocs to this granularity, in bytes. 64 or 4096 are good values to try. -int p_onesize = 0; - -bool p_h2d = true; -bool p_d2h = true; -bool p_bidir = true; -bool p_p2p = false; - - -//#define NO_CHECK - - -#define CHECK_HIP_ERROR() \ - { \ - hipError_t err = hipGetLastError(); \ - if (err != hipSuccess) { \ - printf( \ - "error=%d name=%s at " \ - "ln: %d\n ", \ - err, hipGetErrorString(err), __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } - - -std::string mallocModeString(int mallocMode) { - switch (mallocMode) { - case MallocPinned: - return "pinned"; - case MallocUnpinned: - return "unpinned"; - case MallocRegistered: - return "registered"; - default: - return "mallocmode-UNKNOWN"; - }; -}; - -// **************************************************************************** -int sizeToBytes(int size) { return (size < 0) ? -size : size * 1024; } - - -// **************************************************************************** -std::string sizeToString(int size) { - using namespace std; - stringstream ss; - if (size < 0) { - // char (-) lexically sorts before " " so will cause Byte values to be displayed before kB. - ss << "+" << setfill('0') << setw(3) << -size << "By"; - } else { - ss << size << "kB"; - } - return ss.str(); -} - - -// **************************************************************************** -hipError_t memcopy(void* dst, const void* src, size_t sizeBytes, enum hipMemcpyKind kind) { - if (p_async) { - return hipMemcpyAsync(dst, src, sizeBytes, kind, NULL); - } else { - return hipMemcpy(dst, src, sizeBytes, kind); - } -} - - -// **************************************************************************** -// -sizes are in bytes, +sizes are in kb, last size must be largest -int sizes[] = {-64, -256, -512, 1, 2, 4, 8, 16, 32, 64, 128, 256, - 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288}; -int nSizes = sizeof(sizes) / sizeof(int); -// iterations to be run for the corresponding sizes, less number as the size increases -int iterations[] = {1000, 1000, 1000, 1000, 500, 500, 500, 500, 500, 200, 200, 200, - 200, 200, 100, 100, 100, 100, 50, 50, 50, 20, 20}; - -// **************************************************************************** -// Function: RunBenchmark_H2D -// -// Purpose: -// Measures the bandwidth of the bus connecting the host processor to the -// OpenCL device. This benchmark repeatedly transfers data chunks of various -// sizes across the bus to the OpenCL device, and calculates the bandwidth. -// -// -// Arguments: -// -// Returns: nothing -// -// Programmer: Jeremy Meredith -// Creation: September 08, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Dec 1 17:05:27 EST 2010 -// Added calculation of latency estimate. -// Ben Sander - moved to standalone test -// -// **************************************************************************** -void RunBenchmark_H2D(ResultDatabase& resultDB) { - long long numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - - hipSetDevice(p_device); - - // Create some host memory pattern - float* hostMem = NULL; - if (p_malloc_mode == MallocPinned) { - hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats); - while (hipGetLastError() != hipSuccess) { - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any pinned buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - hipHostMalloc((void**)&hostMem, sizeof(float) * numMaxFloats); - } - } else if (p_malloc_mode == MallocUnpinned) { - if (p_alignedhost) { - #ifdef _WIN32 - hostMem = (float*)_aligned_malloc(numMaxFloats * sizeof(float),p_alignedhost); - #else - hostMem = (float*)aligned_alloc(p_alignedhost, numMaxFloats * sizeof(float)); - #endif - } else { - hostMem = new float[numMaxFloats]; - } - } else if (p_malloc_mode == MallocRegistered) { - if (p_numa_ctl == -1) { - hostMem = (float*)malloc(numMaxFloats * sizeof(float)); - } - - hipHostRegister(hostMem, numMaxFloats * sizeof(float), 0); - CHECK_HIP_ERROR(); - } else { - assert(0); - } - - for (int i = 0; i < numMaxFloats; i++) { - hostMem[i] = i % 77; - } - - float* device; - hipMalloc((void**)&device, sizeof(float) * numMaxFloats); - while (hipGetLastError() != hipSuccess) { - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating device mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any device buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - hipMalloc((void**)&device, sizeof(float) * numMaxFloats); - } - - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - CHECK_HIP_ERROR(); - - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; - - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { - - hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - memcopy(device, hostMem, nbytes, hipMemcpyHostToDevice); - } - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - resultDB.AddResult(std::string("H2D_Bandwidth") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("H2D_Time") + mallocModeString(p_malloc_mode), sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } - } - - if (p_onesize) { - numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); - } - -#ifndef NO_CHECK - - // Check. First reset the host memory, then copy-back result. Then compare against original - // ref value. - for (int i = 0; i < numMaxFloats; i++) { - hostMem[i] = 0; - } - hipMemcpy(hostMem, device, numMaxFloats * sizeof(float), hipMemcpyDeviceToHost); - for (int i = 0; i < numMaxFloats; i++) { - float ref = i % 77; - if (ref != hostMem[i]) { - printf("error: H2D. i=%d reference:%6.f != copyback:%6.2f\n", i, ref, hostMem[i]); - } - } -#endif - - - // Cleanup - hipFree((void*)device); - CHECK_HIP_ERROR(); - switch (p_malloc_mode) { - case MallocPinned: - hipHostFree((void*)hostMem); - CHECK_HIP_ERROR(); - break; - - case MallocUnpinned: - if (p_alignedhost) { - free(hostMem); - } else { - delete[] hostMem; - } - break; - - case MallocRegistered: - hipHostUnregister(hostMem); - CHECK_HIP_ERROR(); - free(hostMem); - break; - default: - assert(0); - } - - - hipEventDestroy(start); - hipEventDestroy(stop); -} - - -// **************************************************************************** -void RunBenchmark_D2H(ResultDatabase& resultDB) { - long long numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - - // Create some host memory pattern - float* hostMem1; - float* hostMem2; - if (p_malloc_mode == MallocPinned) { - hipHostMalloc((void**)&hostMem1, sizeof(float) * numMaxFloats); - hipError_t err1 = hipGetLastError(); - hipHostMalloc((void**)&hostMem2, sizeof(float) * numMaxFloats); - hipError_t err2 = hipGetLastError(); - while (err1 != hipSuccess || err2 != hipSuccess) { - // free the first buffer if only the second failed - if (err1 == hipSuccess) hipHostFree((void*)hostMem1); - - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any pinned buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - hipHostMalloc((void**)&hostMem1, sizeof(float) * numMaxFloats); - err1 = hipGetLastError(); - hipHostMalloc((void**)&hostMem2, sizeof(float) * numMaxFloats); - err2 = hipGetLastError(); - } - } else if (p_malloc_mode == MallocUnpinned) { - hostMem1 = new float[numMaxFloats]; - hostMem2 = new float[numMaxFloats]; - } else if (p_malloc_mode == MallocRegistered) { - if (p_numa_ctl == -1) { - hostMem1 = (float*)malloc(numMaxFloats * sizeof(float)); - hostMem2 = (float*)malloc(numMaxFloats * sizeof(float)); - } - - hipHostRegister(hostMem1, numMaxFloats * sizeof(float), 0); - CHECK_HIP_ERROR(); - hipHostRegister(hostMem2, numMaxFloats * sizeof(float), 0); - CHECK_HIP_ERROR(); - } else { - assert(0); - } - - - for (int i = 0; i < numMaxFloats; i++) hostMem1[i] = i % 77; - - float* device; - hipMalloc((void**)&device, sizeof(float) * numMaxFloats); - while (hipGetLastError() != hipSuccess) { - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating device mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any device buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - hipMalloc((void**)&device, sizeof(float) * numMaxFloats); - } - - hipMemcpy(device, hostMem1, numMaxFloats * sizeof(float), hipMemcpyHostToDevice); - hipDeviceSynchronize(); - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - CHECK_HIP_ERROR(); - - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; - - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { - - hipEventRecord(start, 0); - for (int j = 0; j < p_beatsperiteration; j++) { - memcopy(hostMem2, device, nbytes, hipMemcpyDeviceToHost); - } - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - resultDB.AddResult(std::string("D2H_Bandwidth") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("D2H_Time") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } - } - - if (p_onesize) { - numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); - } - // Check. First reset the host memory, then copy-back result. Then compare against original - // ref value. - for (int i = 0; i < numMaxFloats; i++) { - float ref = i % 77; - if (ref != hostMem2[i]) { - printf("error: D2H. i=%d reference:%6.f != copyback:%6.2f\n", i, ref, hostMem2[i]); - } - } - - // Cleanup - hipFree((void*)device); - CHECK_HIP_ERROR(); - - switch (p_malloc_mode) { - case MallocPinned: - hipHostFree((void*)hostMem1); - CHECK_HIP_ERROR(); - hipHostFree((void*)hostMem2); - CHECK_HIP_ERROR(); - break; - case MallocUnpinned: - delete[] hostMem1; - delete[] hostMem2; - break; - case MallocRegistered: - hipHostUnregister(hostMem1); - CHECK_HIP_ERROR(); - free(hostMem1); - hipHostUnregister(hostMem2); - free(hostMem2); - break; - default: - assert(0); - } - - hipEventDestroy(start); - hipEventDestroy(stop); -} - - -void RunBenchmark_Bidir(ResultDatabase& resultDB) { - long long numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - - hipSetDevice(p_device); - - hipStream_t stream[2]; - - - // Create some host memory pattern - float* hostMem[2] = {NULL, NULL}; - if (p_malloc_mode == MallocPinned) { - while (1) { - hipError_t e1 = hipHostMalloc((void**)&hostMem[0], sizeof(float) * numMaxFloats); - hipError_t e2 = hipHostMalloc((void**)&hostMem[1], sizeof(float) * numMaxFloats); - - if ((e1 == hipSuccess) && (e2 == hipSuccess)) { - break; - } else { - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating pinned mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any pinned buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - } - } - } else if (p_malloc_mode == MallocUnpinned) { - hostMem[0] = new float[numMaxFloats]; - hostMem[1] = new float[numMaxFloats]; - } else if (p_malloc_mode == MallocRegistered) { - if (p_numa_ctl == -1) { - hostMem[0] = (float*)malloc(numMaxFloats * sizeof(float)); - hostMem[1] = (float*)malloc(numMaxFloats * sizeof(float)); - } - hipHostRegister(hostMem[0], numMaxFloats * sizeof(float), 0); - CHECK_HIP_ERROR(); - hipHostRegister(hostMem[1], numMaxFloats * sizeof(float), 0); - CHECK_HIP_ERROR(); - } else { - assert(0); - } - - - for (int i = 0; i < numMaxFloats; i++) { - hostMem[0][i] = i % 77; - } - - float* deviceMem[2]; - while (1) { - hipError_t e1 = hipMalloc((void**)&deviceMem[0], sizeof(float) * numMaxFloats); - hipError_t e2 = hipMalloc((void**)&deviceMem[1], sizeof(float) * numMaxFloats); - - if ((e1 == hipSuccess) && (e2 == hipSuccess)) { - break; - } else { - if (e1) { - // First alloc succeeded, so free it before trying again - hipFree(&deviceMem[0]); - } - // drop the size and try again - if (p_verbose) std::cout << " - dropping size allocating device mem\n"; - --nSizes; - if (nSizes < 1) { - std::cerr << "Error: Couldn't allocate any device buffer\n"; - return; - } - numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - } - }; - - - hipMemset(deviceMem[1], 0xFA, numMaxFloats); - - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - CHECK_HIP_ERROR(); - hipStreamCreate(&stream[0]); - hipStreamCreate(&stream[1]); - - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; - - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { - - hipEventRecord(start, 0); - hipMemcpyAsync(deviceMem[0], hostMem[0], nbytes, hipMemcpyHostToDevice, stream[0]); - hipMemcpyAsync(hostMem[1], deviceMem[1], nbytes, hipMemcpyDeviceToHost, stream[1]); - hipEventRecord(stop, 0); - hipEventSynchronize(stop); - float t = 0; - hipEventElapsedTime(&t, start, stop); - - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = (double(sizeToBytes(2 * thisSize)) / (1000 * 1000)) / t; - char sizeStr[256]; - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - resultDB.AddResult( - std::string("Bidir_Bandwidth") + "_" + mallocModeString(p_malloc_mode), sizeStr, - "GB/sec", speed); - resultDB.AddResult(std::string("Bidir_Time") + "_" + mallocModeString(p_malloc_mode), - sizeStr, "ms", t); - } - if (p_onesize) { - break; - } - } - - // Cleanup - hipFree((void*)deviceMem[0]); - hipFree((void*)deviceMem[1]); - CHECK_HIP_ERROR(); - switch (p_malloc_mode) { - case MallocPinned: - hipHostFree((void*)hostMem[0]); - hipHostFree((void*)hostMem[1]); - CHECK_HIP_ERROR(); - break; - case MallocUnpinned: - delete[] hostMem[0]; - delete[] hostMem[1]; - break; - case MallocRegistered: - for (int i = 0; i < 2; i++) { - hipHostUnregister(hostMem[i]); - CHECK_HIP_ERROR(); - free(hostMem[i]); - } - break; - default: - assert(0); - }; - - hipEventDestroy(start); - hipEventDestroy(stop); - hipStreamDestroy(stream[0]); - hipStreamDestroy(stream[1]); -} - - -#define failed(...) \ - printf("error: "); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - exit(EXIT_FAILURE); - -int parseInt(const char* str, int* output) { - char* next; - *output = strtol(str, &next, 0); - return !strlen(next); -} - - -void checkPeer2PeerSupport() { - int deviceCnt; - hipGetDeviceCount(&deviceCnt); - std::cout << "Total no. of available gpu #" << deviceCnt << "\n" << std::endl; - - for (int deviceId = 0; deviceId < deviceCnt; deviceId++) { - hipDeviceProp_t props; - hipGetDeviceProperties(&props, deviceId); - std::cout << "for gpu#" << deviceId << " " << props.name << std::endl; - std::cout << " peer2peer supported : "; - int PeerCnt = 0; - for (int i = 0; i < deviceCnt; i++) { - int isPeer; - hipDeviceCanAccessPeer(&isPeer, i, deviceId); - if (isPeer) { - std::cout << "gpu#" << i << " "; - ++PeerCnt; - } - } - if (PeerCnt == 0) - std::cout << "NONE" - << " "; - - std::cout << std::endl; - std::cout << " peer2peer not supported : "; - int nonPeerCnt = 0; - for (int i = 0; i < deviceCnt; i++) { - int isPeer; - hipDeviceCanAccessPeer(&isPeer, i, deviceId); - if (!isPeer && (i != deviceId)) { - std::cout << "gpu#" << i << " "; - ++nonPeerCnt; - } - } - if (nonPeerCnt == 0) - std::cout << "NONE" - << " "; - - std::cout << "\n" << std::endl; - } - - std::cout << "\nNote: For non-supported peer2peer devices, memcopy will use/follow the normal " - "behaviour (GPU1-->host then host-->GPU2)\n\n" - << std::endl; -} - -void enablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - - hipSetDevice(currentGpu); - hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - - if (canAccessPeer == 1) { - hipDeviceEnablePeerAccess(peerGpu, 0); - } -} - -void disablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - - hipSetDevice(currentGpu); - hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - - if (canAccessPeer == 1) { - hipDeviceDisablePeerAccess(peerGpu); - } -} - -std::string gpuIDToString(int gpuID) { - using namespace std; - stringstream ss; - ss << gpuID; - return ss.str(); -} - -void RunBenchmark_P2P_Unidir(ResultDatabase& resultDB) { - int gpuCount; - hipGetDeviceCount(&gpuCount); - - int currentGpu, peerGpu; - - long long numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - - for (currentGpu = 0; currentGpu < gpuCount; currentGpu++) { - for (peerGpu = 0; peerGpu < gpuCount; peerGpu++) { - if (currentGpu == peerGpu) continue; - - float *currentGpuMem, *peerGpuMem; - - hipSetDevice(currentGpu); - hipMalloc((void**)¤tGpuMem, sizeof(float) * numMaxFloats); - - hipSetDevice(peerGpu); - hipMalloc((void**)&peerGpuMem, sizeof(float) * numMaxFloats); - - enablePeer2Peer(currentGpu, peerGpu); - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - CHECK_HIP_ERROR(); - - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; - - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { - - hipDeviceSynchronize(); - - hipEventRecord(start, 0); - - for (int j = 0; j < p_beatsperiteration; j++) { - hipMemcpy(peerGpuMem, currentGpuMem, nbytes, hipMemcpyDeviceToDevice); - } - - hipEventRecord(stop, 0); - - hipEventSynchronize(stop); - - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(thisSize)/1000) * p_beatsperiteration) / 1000) / t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), - p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - - string cGpu, pGpu; - cGpu = gpuIDToString(currentGpu); - pGpu = gpuIDToString(peerGpu); - - resultDB.AddResult(std::string("p2p_uni") + "_gpu" + std::string(cGpu) + - "_gpu" + std::string(pGpu), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("P2P_uni") + "_gpu" + std::string(cGpu) + - "_gpu" + std::string(pGpu), - sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } - } - - if (p_onesize) { - numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); - } - - disablePeer2Peer(currentGpu, peerGpu); - - hipEventDestroy(start); - hipEventDestroy(stop); - - // Cleanup - hipFree((void*)currentGpuMem); - hipFree((void*)peerGpuMem); - CHECK_HIP_ERROR(); - - hipSetDevice(peerGpu); - hipDeviceReset(); - - hipSetDevice(currentGpu); - hipDeviceReset(); - } - } -} - -void RunBenchmark_P2P_Bidir(ResultDatabase& resultDB) { - int gpuCount; - hipGetDeviceCount(&gpuCount); - - hipStream_t stream[2]; - - int currentGpu, peerGpu; - - long long numMaxFloats = 1024 * (sizes[nSizes - 1]) / 4; - - for (currentGpu = 0; currentGpu < gpuCount; currentGpu++) { - for (peerGpu = 0; peerGpu < gpuCount; peerGpu++) { - if (currentGpu == peerGpu) continue; - - float *currentGpuMem[2], *peerGpuMem[2]; - - hipSetDevice(currentGpu); - hipMalloc((void**)¤tGpuMem[0], sizeof(float) * numMaxFloats); - hipMalloc((void**)¤tGpuMem[1], sizeof(float) * numMaxFloats); - enablePeer2Peer(peerGpu,currentGpu); - - hipSetDevice(peerGpu); - hipMalloc((void**)&peerGpuMem[0], sizeof(float) * numMaxFloats); - hipMalloc((void**)&peerGpuMem[1], sizeof(float) * numMaxFloats); - - enablePeer2Peer(currentGpu, peerGpu); - - hipEvent_t start, stop; - hipEventCreate(&start); - hipEventCreate(&stop); - CHECK_HIP_ERROR(); - - hipStreamCreate(&stream[0]); - hipStreamCreate(&stream[1]); - - // store the times temporarily to estimate latency - // float times[nSizes]; - for (int i = 0; i < nSizes; i++) { - int sizeIndex, iterIndex; - sizeIndex = i; - iterIndex = i; - - const int thisSize = p_onesize ? p_onesize : sizes[sizeIndex]; - const int nbytes = sizeToBytes(thisSize); - const int niter = p_iterations ? p_iterations : iterations[iterIndex]; - for (int pass = 0; pass < niter; pass++) { - - hipDeviceSynchronize(); - - hipEventRecord(start, 0); - - for (int j = 0; j < p_beatsperiteration; j++) { - hipMemcpyAsync(peerGpuMem[0], currentGpuMem[0], nbytes, - hipMemcpyDeviceToDevice, stream[0]); - hipMemcpyAsync(currentGpuMem[1], peerGpuMem[1], nbytes, - hipMemcpyDeviceToDevice, stream[1]); - } - - hipEventRecord(stop, 0); - - hipEventSynchronize(stop); - - float t = 0; - hipEventElapsedTime(&t, start, stop); - // times[sizeIndex] = t; - - // Convert to GB/sec - if (p_verbose) { - std::cerr << "size " << sizeToString(thisSize) << " took " << t << " ms\n"; - } - - double speed = - (double(double(sizeToBytes(2 * thisSize)/1000) * p_beatsperiteration) / 1000) / - t; - char sizeStr[256]; - if (p_beatsperiteration > 1) { - sprintf(sizeStr, "%9sx%d", sizeToString(thisSize).c_str(), - p_beatsperiteration); - } else { - sprintf(sizeStr, "%9s", sizeToString(thisSize).c_str()); - } - - string cGpu, pGpu; - cGpu = gpuIDToString(currentGpu); - pGpu = gpuIDToString(peerGpu); - - resultDB.AddResult(std::string("p2p_bi") + "_gpu" + std::string(cGpu) + "_gpu" + - std::string(pGpu), - sizeStr, "GB/sec", speed); - resultDB.AddResult(std::string("P2P_bi") + "_gpu" + std::string(cGpu) + "_gpu" + - std::string(pGpu), - sizeStr, "ms", t); - - } - if (p_onesize) { - break; - } - } - - if (p_onesize) { - numMaxFloats = sizeToBytes(p_onesize) / sizeof(float); - } - - disablePeer2Peer(currentGpu, peerGpu); - disablePeer2Peer(peerGpu, currentGpu); - - hipEventDestroy(start); - hipEventDestroy(stop); - - for (int i = 0; i < 2; i++) { - hipStreamDestroy(stream[i]); - - hipFree((void*)currentGpuMem[i]); - hipFree((void*)peerGpuMem[i]); - CHECK_HIP_ERROR(); - } - - hipSetDevice(peerGpu); - hipDeviceReset(); - - hipSetDevice(currentGpu); - hipDeviceReset(); - } - } -} - - -void printConfig() { - hipDeviceProp_t props; - hipGetDeviceProperties(&props, p_device); - - printf("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz MallocMode=%s\n", props.name, - props.totalGlobalMem / 1024.0 / 1024.0 / 1024.0, props.multiProcessorCount, - props.clockRate / 1000.0, mallocModeString(p_malloc_mode).c_str()); -} - -void help() { - printf("Usage: hipBusBandwidth [OPTIONS]\n"); - printf(" --iterations, -i : Number of copy iterations to run.\n"); - printf( - " --beatsperiterations, -b : Number of beats (back-to-back copies of same size) per " - "iteration to run.\n"); - printf(" --device, -d : Device ID to use (0..numDevices).\n"); - printf(" --unpinned : Use unpinned host memory.\n"); - printf(" --d2h : Run only device-to-host test.\n"); - printf(" --h2d : Run only host-to-device test.\n"); - printf(" --bidir : Run only bidir copy test.\n"); - printf(" --p2p : Run only peer2peer unidir and bidir copy tests.\n"); - printf(" --verbose : Print verbose status messages as test is run.\n"); - printf(" --detailed : Print detailed report (including all trials).\n"); - printf( - " --async : Use hipMemcpyAsync(with NULL stream) for H2D/D2H. Default " - "uses hipMemcpy.\n"); - printf( - " --onesize, -o : Only run one measurement, at specified size (in KB, or if " - "negative in bytes)\n"); -}; - -int parseStandardArguments(int argc, char* argv[]) { - for (int i = 1; i < argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { - if (++i >= argc || !parseInt(argv[i], &p_iterations)) { - failed("Bad iterations argument"); - } - } else if (!strcmp(arg, "--beatsperiteration") || (!strcmp(arg, "-b"))) { - if (++i >= argc || !parseInt(argv[i], &p_beatsperiteration)) { - failed("Bad beatsperiteration argument"); - } - } else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) { - if (++i >= argc || !parseInt(argv[i], &p_device)) { - failed("Bad device argument"); - } - } else if (!strcmp(arg, "--onesize") || (!strcmp(arg, "-o"))) { - if (++i >= argc || !parseInt(argv[i], &p_onesize)) { - failed("Bad onesize argument"); - } - } else if (!strcmp(arg, "--unpinned")) { - p_malloc_mode = MallocUnpinned; - } else if (!strcmp(arg, "--registered")) { - p_malloc_mode = MallocRegistered; - } else if (!strcmp(arg, "--h2d")) { - p_h2d = true; - p_d2h = false; - p_bidir = false; - - } else if (!strcmp(arg, "--d2h")) { - p_h2d = false; - p_d2h = true; - p_bidir = false; - - } else if (!strcmp(arg, "--bidir")) { - p_h2d = false; - p_d2h = false; - p_bidir = true; - - } else if (!strcmp(arg, "--p2p")) { - p_h2d = false; - p_d2h = false; - p_bidir = false; - p_p2p = true; - - } else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) { - help(); - exit(EXIT_SUCCESS); - - - } else if (!strcmp(arg, "--verbose")) { - p_verbose = 1; - } else if (!strcmp(arg, "--async")) { - p_async = 1; - } else if (!strcmp(arg, "--detailed")) { - p_detailed = 1; - } else { - failed("Bad argument '%s'", arg); - } - } - - return 0; -}; - - -int main(int argc, char* argv[]) { - parseStandardArguments(argc, argv); - - if (p_p2p) { - checkPeer2PeerSupport(); - - ResultDatabase resultDB_Unidir, resultDB_Bidir; - - RunBenchmark_P2P_Unidir(resultDB_Unidir); - RunBenchmark_P2P_Bidir(resultDB_Bidir); - - resultDB_Unidir.DumpSummary(std::cout); - resultDB_Bidir.DumpSummary(std::cout); - - if (p_detailed) { - resultDB_Unidir.DumpDetailed(std::cout); - resultDB_Bidir.DumpDetailed(std::cout); - } - } else { - printConfig(); - - if (p_h2d) { - ResultDatabase resultDB; - RunBenchmark_H2D(resultDB); - - resultDB.DumpSummary(std::cout); - - if (p_detailed) { - resultDB.DumpDetailed(std::cout); - } - } - - if (p_d2h) { - ResultDatabase resultDB; - RunBenchmark_D2H(resultDB); - - resultDB.DumpSummary(std::cout); - - if (p_detailed) { - resultDB.DumpDetailed(std::cout); - } - } - - - if (p_bidir) { - ResultDatabase resultDB; - RunBenchmark_Bidir(resultDB); - - resultDB.DumpSummary(std::cout); - - if (p_detailed) { - resultDB.DumpDetailed(std::cout); - } - } - } -} diff --git a/samples/1_Utils/hipCommander/CMakeLists.txt b/samples/1_Utils/hipCommander/CMakeLists.txt deleted file mode 100644 index 2719619f36..0000000000 --- a/samples/1_Utils/hipCommander/CMakeLists.txt +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -project(hipCommander) - -cmake_minimum_required(VERSION 3.10) - -if (NOT DEFINED ROCM_PATH ) - set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." ) -endif () - -# Search for rocm in common locations -list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH}) - -# Find hip -find_package(hip) - -# Set compiler and linker -set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE}) -set(CMAKE_BUILD_TYPE Release) - -# Create the excutable -add_executable(hipCommander hipCommander.cpp) - -# Generate code object -add_custom_target( - codeobj - ALL - COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../nullkernel.hip.cpp -o nullkernel.hsaco - COMMENT "codeobj generated" -) - -add_dependencies(hipCommander codeobj) - -# Link with HIP -target_link_libraries(hipCommander hip::host) -set_property(TARGET hipCommander PROPERTY CXX_STANDARD 11) diff --git a/samples/1_Utils/hipCommander/LICENSE.txt b/samples/1_Utils/hipCommander/LICENSE.txt deleted file mode 100644 index 5d0d603232..0000000000 --- a/samples/1_Utils/hipCommander/LICENSE.txt +++ /dev/null @@ -1,27 +0,0 @@ - -Copyright (c) 2011, UT-Battelle, LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. -* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor - the names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - diff --git a/samples/1_Utils/hipCommander/Makefile b/samples/1_Utils/hipCommander/Makefile deleted file mode 100644 index fef6bfc946..0000000000 --- a/samples/1_Utils/hipCommander/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -ifeq ($(OS),Windows_NT) - $(error Makefile is not supported on windows platform. Please use cmake instead to build sample.) -endif -ROCM_PATH?= $(wildcard /opt/rocm/) -HIP_PATH?= $(wildcard $(ROCM_PATH)/hip) -ifeq (,$(HIP_PATH)) - HIP_PATH=../../.. -endif -HIPCC=$(HIP_PATH)/bin/hipcc - -EXE=hipCommander -OPT=-O3 -#CXXFLAGS = -O3 -g -CXXFLAGS = $(OPT) --std=c++11 - -HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform) - -CODE_OBJECTS=nullkernel.hsaco - -all: ${EXE} ${CODE_OBJECTS} - -$(EXE): hipCommander.cpp - $(HIPCC) $(CXXFLAGS) $^ -o $@ - -nullkernel.hsaco : nullkernel.hip.cpp - $(HIPCC) --genco nullkernel.hip.cpp -o nullkernel.hsaco - - -install: $(EXE) - cp $(EXE) $(HIP_PATH)/bin - - -clean: - rm -f *.o *.co $(EXE) diff --git a/samples/1_Utils/hipCommander/ResultDatabase.cpp b/samples/1_Utils/hipCommander/ResultDatabase.cpp deleted file mode 100644 index 51ced81fae..0000000000 --- a/samples/1_Utils/hipCommander/ResultDatabase.cpp +++ /dev/null @@ -1,454 +0,0 @@ -#include "ResultDatabase.h" - -#include -#include -#include -#include - -using namespace std; - -bool ResultDatabase::Result::operator<(const Result& rhs) const { - if (test < rhs.test) return true; - if (test > rhs.test) return false; - if (atts < rhs.atts) return true; - if (atts > rhs.atts) return false; - return false; // less-operator returns false on equal -} - -double ResultDatabase::Result::GetMin() const { - double r = FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = min(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMax() const { - double r = -FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r = max(r, value[i]); - } - return r; -} - -double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); } - -double ResultDatabase::Result::GetPercentile(double q) const { - int n = value.size(); - if (n == 0) return FLT_MAX; - if (n == 1) return value[0]; - - if (q <= 0) return value[0]; - if (q >= 100) return value[n - 1]; - - double index = ((n + 1.) * q / 100.) - 1; - - vector sorted = value; - sort(sorted.begin(), sorted.end()); - - if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.)); - - int index_lo = int(index); - double frac = index - index_lo; - if (frac == 0) return sorted[index_lo]; - - double lo = sorted[index_lo]; - double hi = sorted[index_lo + 1]; - return lo + (hi - lo) * frac; -} - -double ResultDatabase::Result::GetMean() const { - double r = 0; - for (int i = 0; i < value.size(); i++) { - r += value[i]; - } - return r / double(value.size()); -} - -double ResultDatabase::Result::GetStdDev() const { - double r = 0; - double u = GetMean(); - if (u == FLT_MAX) return FLT_MAX; - for (int i = 0; i < value.size(); i++) { - r += (value[i] - u) * (value[i] - u); - } - r = sqrt(r / value.size()); - return r; -} - - -void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit, - const vector& values) { - for (int i = 0; i < values.size(); i++) { - AddResult(test, atts, unit, values[i]); - } -} - -static string RemoveAllButLeadingSpaces(const string& a) { - string b; - int n = a.length(); - int i = 0; - while (i < n && a[i] == ' ') { - b += a[i]; - ++i; - } - for (; i < n; i++) { - if (a[i] != ' ' && a[i] != '\t') b += a[i]; - } - return b; -} - -void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig, - const string& unit_orig, double value) { - string test = RemoveAllButLeadingSpaces(test_orig); - string atts = RemoveAllButLeadingSpaces(atts_orig); - string unit = RemoveAllButLeadingSpaces(unit_orig); - int index; - for (index = 0; index < results.size(); index++) { - if (results[index].test == test && results[index].atts == atts) { - if (results[index].unit != unit) throw "Internal error: mixed units"; - - break; - } - } - - if (index >= results.size()) { - Result r; - r.test = test; - r.atts = atts; - r.unit = unit; - results.push_back(r); - } - - results[index].value.push_back(value); -} - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the full results, including all trials. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: August 14, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010 -// Renamed to DumpDetailed to make room for a DumpSummary. -// -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010 -// Changed note about missing values to be worded a little better. -// -// **************************************************************************** -void ResultDatabase::DumpDetailed(ostream& out) { - vector sorted(results); - sort(sorted.begin(), sorted.end()); - - const int testNameW = 24; - const int attW = 12; - const int fieldW = 11; - out << std::fixed << right << std::setprecision(4); - - int maxtrials = 1; - for (int i = 0; i < sorted.size(); i++) { - if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size(); - } - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - for (int j = 0; j < r.value.size(); j++) { - if (r.value[j] == FLT_MAX) - out << "N/A\t"; - else - out << r.value[j] << "\t"; - } - - out << endl; - } - out << endl - << "Note: Any results marked with (*) had missing values." << endl - << " This can occur on systems with a mixture of" << endl - << " device types or architectural capabilities." << endl; -} - - -// **************************************************************************** -// Method: ResultDatabase::DumpDetailed -// -// Purpose: -// Writes the summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out where to print -// -// Programmer: Jeremy Meredith -// Creation: November 10, 2010 -// -// Modifications: -// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010 -// Added note about (*) missing value tag. -// -// **************************************************************************** -void ResultDatabase::DumpSummary(ostream& out) { - vector sorted(results); - sort(sorted.begin(), sorted.end()); - - const int testNameW = 24; - const int attW = 12; - const int fieldW = 9; - out << std::fixed << right << std::setprecision(4); - - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t" - << "median\t" - << "mean\t" - << "stddev\t" - << "min\t" - << "max\t"; - out << endl; - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << setw(testNameW) << r.test + "\t"; - out << setw(attW) << r.atts + "\t"; - out << setw(fieldW) << r.unit + "\t"; - if (r.GetMedian() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMedian() << "\t"; - if (r.GetMean() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMean() << "\t"; - if (r.GetStdDev() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetStdDev() << "\t"; - if (r.GetMin() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMin() << "\t"; - if (r.GetMax() == FLT_MAX) - out << "N/A\t"; - else - out << r.GetMax() << "\t"; - - out << endl; - } - out << endl - << "Note: results marked with (*) had missing values such as" << endl - << "might occur with a mixture of architectural capabilities." << endl; -} - -// **************************************************************************** -// Method: ResultDatabase::ClearAllResults -// -// Purpose: -// Clears all existing results from the ResultDatabase; used for multiple passes -// of the same test or multiple tests. -// -// Arguments: -// -// Programmer: Jeffrey Young -// Creation: September 10th, 2014 -// -// Modifications: -// -// -// **************************************************************************** -void ResultDatabase::ClearAllResults() { results.clear(); } - -// **************************************************************************** -// Method: ResultDatabase::DumpCsv -// -// Purpose: -// Writes either detailed or summary results (min/max/stddev/med/mean), but not -// every individual trial. -// -// Arguments: -// out file to print CSV results -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** -void ResultDatabase::DumpCsv(string fileName) { - bool emptyFile; - vector sorted(results); - - sort(sorted.begin(), sorted.end()); - - // Check to see if the file is empty - if so, add the headers - emptyFile = this->IsFileEmpty(fileName); - - // Open file and append by default - ofstream out; - out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app); - - // Add headers only for empty files - if (emptyFile) { - // TODO: in big parallel runs, the "trials" are the procs - // and we really don't want to print them all out.... - out << "test, " - << "atts, " - << "units, " - << "median, " - << "mean, " - << "stddev, " - << "min, " - << "max, "; - out << endl; - } - - for (int i = 0; i < sorted.size(); i++) { - Result& r = sorted[i]; - out << r.test << ", "; - out << r.atts << ", "; - out << r.unit << ", "; - if (r.GetMedian() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMedian() << ", "; - if (r.GetMean() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMean() << ", "; - if (r.GetStdDev() == FLT_MAX) - out << "N/A, "; - else - out << r.GetStdDev() << ", "; - if (r.GetMin() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMin() << ", "; - if (r.GetMax() == FLT_MAX) - out << "N/A, "; - else - out << r.GetMax() << ", "; - - out << endl; - } - out << endl; - - out.close(); -} - -// **************************************************************************** -// Method: ResultDatabase::IsFileEmpty -// -// Purpose: -// Returns whether a file is empty - used as a helper for CSV printing -// -// Arguments: -// file The input file to check for emptiness -// -// Programmer: Jeffrey Young -// Creation: August 28th, 2014 -// -// Modifications: -// -// **************************************************************************** - -bool ResultDatabase::IsFileEmpty(string fileName) { - - ifstream file(fileName.c_str()); - - // If the file doesn't exist it is by definition empty - if (!file.good()) { - return true; - } else { - bool fileEmpty; - fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof()); - file.close(); - - return fileEmpty; - } - - // Otherwise, return false - return false; -} - - -// **************************************************************************** -// Method: ResultDatabase::GetResultsForTest -// -// Purpose: -// Returns a vector of results for just one test name. -// -// Arguments: -// test the name of the test results to search for -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -vector ResultDatabase::GetResultsForTest(const string& test) { - // get only the given test results - vector retval; - for (int i = 0; i < results.size(); i++) { - Result& r = results[i]; - if (r.test == test) retval.push_back(r); - } - return retval; -} - -// **************************************************************************** -// Method: ResultDatabase::GetResults -// -// Purpose: -// Returns all the results. -// -// Arguments: -// -// Programmer: Jeremy Meredith -// Creation: December 3, 2010 -// -// Modifications: -// -// **************************************************************************** -const vector& ResultDatabase::GetResults() const { return results; } diff --git a/samples/1_Utils/hipCommander/ResultDatabase.h b/samples/1_Utils/hipCommander/ResultDatabase.h deleted file mode 100644 index ca6a00fc91..0000000000 --- a/samples/1_Utils/hipCommander/ResultDatabase.h +++ /dev/null @@ -1,89 +0,0 @@ -#ifndef RESULT_DATABASE_H -#define RESULT_DATABASE_H - -#include -#include -#include -#include -#include -using std::ifstream; -using std::ofstream; -using std::ostream; -using std::string; -using std::vector; - - -// **************************************************************************** -// Class: ResultDatabase -// -// Purpose: -// Track numerical results as they are generated. -// Print statistics of raw results. -// -// Programmer: Jeremy Meredith -// Creation: June 12, 2009 -// -// Modifications: -// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010 -// Split timing reports into detailed and summary. E.g. for serial code, -// we might report all trial values, but skip them in parallel. -// -// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010 -// Added check for missing value tag. -// -// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010 -// Added percentile statistic. -// -// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010 -// Added a method to extract a subset of results based on test name. Also, -// the Result class is now public, so that clients can use them directly. -// Added a GetResults method as well, and made several functions const. -// -// **************************************************************************** -class ResultDatabase { - public: - // - // A performance result for a single SHOC benchmark run. - // - struct Result { - string test; // e.g. "readback" - string atts; // e.g. "pagelocked 4k^2" - string unit; // e.g. "MB/sec" - vector value; // e.g. "837.14" - double GetMin() const; - double GetMax() const; - double GetMedian() const; - double GetPercentile(double q) const; - double GetMean() const; - double GetStdDev() const; - - bool operator<(const Result& rhs) const; - - bool HadAnyFLTMAXValues() const { - for (int i = 0; i < value.size(); ++i) { - if (value[i] >= FLT_MAX) return true; - } - return false; - } - }; - - protected: - vector results; - - public: - void AddResult(const string& test, const string& atts, const string& unit, double value); - void AddResults(const string& test, const string& atts, const string& unit, - const vector& values); - vector GetResultsForTest(const string& test); - const vector& GetResults() const; - void ClearAllResults(); - void DumpDetailed(ostream&); - void DumpSummary(ostream&); - void DumpCsv(string fileName); - - private: - bool IsFileEmpty(string fileName); -}; - - -#endif diff --git a/samples/1_Utils/hipCommander/c.cmd b/samples/1_Utils/hipCommander/c.cmd deleted file mode 100644 index 4cb980eccb..0000000000 --- a/samples/1_Utils/hipCommander/c.cmd +++ /dev/null @@ -1,3 +0,0 @@ -loop(1000); H2D; NullKernel; D2H; endloop; -streamsync; -printTiming(1000) diff --git a/samples/1_Utils/hipCommander/classic.cmd b/samples/1_Utils/hipCommander/classic.cmd deleted file mode 100644 index 7b1e4273c9..0000000000 --- a/samples/1_Utils/hipCommander/classic.cmd +++ /dev/null @@ -1 +0,0 @@ -H2D; NullKernel; D2H; streamsync diff --git a/samples/1_Utils/hipCommander/hipCommander.cpp b/samples/1_Utils/hipCommander/hipCommander.cpp deleted file mode 100644 index 0743641214..0000000000 --- a/samples/1_Utils/hipCommander/hipCommander.cpp +++ /dev/null @@ -1,865 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#ifndef _WIN32 - #include -#endif - -#include "ResultDatabase.h" -#include "nullkernel.hip.cpp" - -bool g_printedTiming = false; - -// Cmdline parms: -int p_device = 0; -const char* p_command = "setstream(1); H2D; NullKernel; D2H;"; -const char* p_file = nullptr; -unsigned p_verbose = 0x0; -unsigned p_db = 0x0; -unsigned p_blockingSync = 0x0; - -//--- -int p_iterations = 1; - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" -#define KGRN "\x1B[32m" - - -#define failed(...) \ - printf("error: "); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - abort(); - - -#define HIPCHECK(error) \ - { \ - hipError_t localError = error; \ - if (localError != hipSuccess) { \ - printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__, KNRM); \ - failed("API returned error code."); \ - } \ - } -#define HIPASSERT(condition, msg) \ - if (!(condition)) { \ - failed("%sassertion %s at %s:%d: %s%s\n", KRED, #condition, __FILE__, __LINE__, msg, \ - KNRM); \ - } - - -int parseInt(const char* str, int* output) { - char* next; - *output = strtol(str, &next, 0); - return !strlen(next); -} - - -void printConfig() { - hipDeviceProp_t props; - HIPCHECK(hipGetDeviceProperties(&props, p_device)); - - printf("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz\n", props.name, - props.totalGlobalMem / 1024.0 / 1024.0 / 1024.0, props.multiProcessorCount, - props.clockRate / 1000.0); -} - - -void help() { - printf("Usage: hipBusBandwidth [OPTIONS]\n"); - printf(" --file, -f : Read string of commands from file\n"); - printf(" --command, -c : String specifying commands to run.\n"); - printf(" --iterations, -i : Number of copy iterations to run.\n"); - printf(" --device, -d : Device ID to use (0..numDevices).\n"); - printf( - " --verbose, -v : Verbose printing of status. Fore more info, combine with " - "HIP_TRACE_API on ROCm\n"); -}; - - -int parseStandardArguments(int argc, char* argv[]) { - for (int i = 1; i < argc; i++) { - const char* arg = argv[i]; - - if (!strcmp(arg, " ")) { - // skip NULL args. - } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { - if (++i >= argc || !parseInt(argv[i], &p_iterations)) { - failed("Bad --iterations argument"); - } - - } else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) { - if (++i >= argc || !parseInt(argv[i], &p_device)) { - failed("Bad --device argument"); - } - - } else if (!strcmp(arg, "--file") || (!strcmp(arg, "-f"))) { - if (++i >= argc) { - failed("Bad --file argument"); - } else { - p_file = argv[i]; - } - - } else if (!strcmp(arg, "--commands") || (!strcmp(arg, "-c"))) { - if (++i >= argc) { - failed("Bad --commands argument"); - } else { - p_command = argv[i]; - } - - } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { - p_verbose = 1; - - } else if (!strcmp(arg, "--blockingSync") || (!strcmp(arg, "-B"))) { - p_blockingSync = 1; - - - } else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) { - help(); - exit(EXIT_SUCCESS); - - } else { - failed("Bad argument '%s'", arg); - } - } - - return 0; -}; - -// Returns the current system time in microseconds -inline long long get_time() { -#ifdef _WIN32 - struct timespec ts; - timespec_get(&ts, TIME_UTC); - return (ts.tv_sec * 1000000) + (ts.tv_nsec/1000); -#else - struct timeval tv; - gettimeofday(&tv, 0); - return (tv.tv_sec * 1000000) + tv.tv_usec; -#endif -} - - -class Command; - - -//================================================================================================= -// A stream of commands , specified as a string. -class CommandStream { - public: - // State that is inherited by sub-blocks: - struct CommandStreamState { - hipStream_t _currentStream; - std::vector _streams; - vector _subBlocks; - }; - - public: - CommandStream(std::string commandStreamString, int iterations); - ~CommandStream(); - - hipStream_t currentStream() const { return _state._currentStream; }; - - void print(const std::string& indent = "") const; - void printBrief(std::ostream& s = std::cout) const; - void run(); - void recordTime(); - void printTiming(int iterations = 0); - - CommandStream* currentCommandStream() { - return _parseInSubBlock ? _state._subBlocks.back() : this; - }; - - void enterSubBlock(CommandStream* commandStream) { - _parseInSubBlock = true; - _state._subBlocks.push_back(commandStream); - }; - - void exitSubBlock() { _parseInSubBlock = false; }; - - - void setParent(CommandStream* parentCmdStream) { - _parentCommandStream = parentCmdStream; - _state = parentCmdStream->_state; - }; - CommandStream* getParent() { return _parentCommandStream; }; - - void setStream(int streamIndex); - - CommandStreamState& getState() { return _state; }; - - private: - static void tokenize(const std::string& s, char delim, std::vector& tokens); - void parse(const std::string fullCmd); - - protected: - CommandStreamState _state; - - private: - // List of commands to run in this stream: - std::vector _commands; - - - // Number of iterations to run the command loop - int _iterations; - - - // Us to run the the command-stream. Only valid after run is called. - long long _startTime; - double _elapsedUs; - - // Track nested loop of command streams: - CommandStream* _parentCommandStream; - - // Track if we are parsing commands in the subblock. - bool _parseInSubBlock; -}; - - -//================================================================================================= -class Command { - public: - // @p minArgs : Minimum arguments for command. -1 = don't check. - // @p maxArgs : Minimum arguments for command. 0 means min=max, ie exact #arguments expected. - // -1 = don't check max. - Command(CommandStream* cmdStream, const std::vector& args, int minArgs = 0, - int maxArgs = 0) - : _commandStream(cmdStream), _args(args) { - int numArgs = args.size() - 1; - - if ((minArgs != -1) && (numArgs < minArgs)) { - // TODO - print full command here. - failed("Not enough arguments for command %s. (Expected %d, got %d)", args[0].c_str(), - minArgs, numArgs); - } - - // Check for an exact number of arguments: - if (maxArgs == 0) { - maxArgs = minArgs; - } - if ((maxArgs != -1) && (numArgs > maxArgs)) { - failed("Too many arguments for command %s. (Expected %d, got %d)", args[0].c_str(), - maxArgs, numArgs); - } - }; - - void printBrief(std::ostream& s = std::cout) const { s << _args[0]; } - - virtual ~Command(){}; - - virtual void print(const std::string& indent = "") const { - std::cout << indent << "["; - std::for_each(_args.begin(), _args.end(), [](const std::string& s) { std::cout << s; }); - std::cout << "]"; - }; - - - virtual void run() = 0; - - protected: - int readIntArg(int argIndex, const std::string& argName) { - // TODO - catch references to non-existant arguments here. - int argVal; - try { - argVal = std::stoi(_args[argIndex]); - } catch (std::invalid_argument) { - failed("Command %s has bad %s argument ('%s')", _args[0].c_str(), argName.c_str(), - _args[argIndex].c_str()); - } - return argVal; - } - - protected: - CommandStream* _commandStream; - std::vector _args; -}; - - -#define FILENAME "nullkernel.hsaco" -#define KERNEL_NAME "NullKernel" - -//================================================================================================= -// HCC optimizes away fully NULL kernel calls, so run one that is nearly null: -class ModuleKernelCommand : public Command { - public: - ModuleKernelCommand(CommandStream* cmdStream, const std::vector& args) - : Command(cmdStream, args), _stream(cmdStream->currentStream()) { - hipModule_t module; - HIPCHECK(hipModuleLoad(&module, FILENAME)); - HIPCHECK(hipModuleGetFunction(&_function, module, KERNEL_NAME)); - }; - ~ModuleKernelCommand(){}; - - void run() override { -#define LEN 64 - float *X = NULL; - HIPCHECK(hipMalloc((void**)&X, sizeof(float))); - struct { - float *Ad; - }args; - args.Ad = X; - size_t argSize = sizeof(args); - - void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, - HIP_LAUNCH_PARAM_BUFFER_SIZE, &argSize, HIP_LAUNCH_PARAM_END}; - - hipModuleLaunchKernel(_function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config); - }; - - - public: - hipFunction_t _function; - hipStream_t _stream; -}; - - -class KernelCommand : public Command { - public: - enum Type { Null, VectorAdd }; - KernelCommand(CommandStream* cmdStream, const std::vector& args, Type kind) - : Command(cmdStream, args), _kind(kind), _stream(cmdStream->currentStream()){}; - ~KernelCommand(){}; - - - void run() override { - static const int gridX = 64; - static const int groupX = 64; - - switch (_kind) { - case Null: - hipLaunchKernelGGL(NullKernel, dim3(gridX / groupX), dim3(gridX), 0, _stream, nullptr); - break; - case VectorAdd: - assert(0); // TODO - break; - }; - } - - private: - Type _kind; - hipStream_t _stream; -}; - -//================================================================================================= -class CopyCommand : public Command { - enum MemType { PinnedHost, UnpinnedHost, Device }; - - public: - CopyCommand(CommandStream* cmdStream, const std::vector& args, hipMemcpyKind kind, - bool isAsync, bool isPinnedHost); - - ~CopyCommand() { - if (_dst) { - dealloc(_dst, _dstType); - _dst = NULL; - }; - - if (_src) { - dealloc(_src, _srcType); - _src = NULL; - } - } - - - void run() override { - if (_isAsync) { - HIPCHECK(hipMemcpyAsync(_dst, _src, _sizeBytes, _kind, _stream)); - } else { - HIPCHECK(hipMemcpy(_dst, _src, _sizeBytes, _kind)); - } - }; - - private: - void* alloc(size_t size, MemType memType) { - void* p; - if (memType == Device) { - HIPCHECK(hipMalloc(&p, size)); - - } else if (memType == PinnedHost) { - HIPCHECK(hipHostMalloc(&p, size)); - - } else if (memType == UnpinnedHost) { - p = (char*)malloc(size); - HIPASSERT(p, "malloc failed"); - - } else { - HIPASSERT(0, "unsupported memType"); - } - - return p; - }; - - - static void dealloc(void* p, MemType memType) { - if (memType == Device) { - HIPCHECK(hipFree(p)); - } else if (memType == PinnedHost) { - HIPCHECK(hipHostFree(p)); - } else if (memType == UnpinnedHost) { - free(p); - } else { - HIPASSERT(0, "unsupported memType"); - } - } - - - private: - bool _isAsync; - hipStream_t _stream; - hipMemcpyKind _kind; - - size_t _sizeBytes; - void* _dst; - MemType _dstType; - - void* _src; - MemType _srcType; -}; - - -//================================================================================================= -class DeviceSyncCommand : public Command { - public: - DeviceSyncCommand(CommandStream* cmdStream, const std::vector& args) - : Command(cmdStream, args){}; - - void run() override { HIPCHECK(hipDeviceSynchronize()); }; -}; - - -//================================================================================================= -class StreamSyncCommand : public Command { - public: - StreamSyncCommand(CommandStream* cmdStream, const std::vector& args) - : Command(cmdStream, args), _stream(cmdStream->currentStream()){}; - - static const char* help() { return "synchronizes the current stream"; }; - - - void run() override { HIPCHECK(hipStreamSynchronize(_stream)); }; - - private: - hipStream_t _stream; -}; - - -//================================================================================================= - -//================================================================================================= -class LoopCommand : public Command { - public: - LoopCommand(CommandStream* parentCmdStream, const std::vector& args) - : Command(parentCmdStream, args, 1) { - int loopCnt; - try { - loopCnt = std::stoi(args[1]); - } catch (std::invalid_argument) { - failed("bad LOOP_CNT=%s", args[1].c_str()); - } - - _commandStream = new CommandStream("", loopCnt); - _commandStream->setParent(parentCmdStream); - parentCmdStream->enterSubBlock(_commandStream); - }; - - - void print(const std::string& indent = "") const override { - Command::print(); - _commandStream->print(indent + " "); - }; - - void run() override { _commandStream->run(); }; -}; - - -//================================================================================================= -class EndBlockCommand : public Command { - public: - EndBlockCommand(CommandStream* blockCmdStream, CommandStream* parentCmdStream, - const std::vector& args) - : Command(parentCmdStream, args, 0, 1), _blockCmdStream(blockCmdStream), _printTiming(0) { - int argCnt = args.size() - 1; - if (argCnt >= 1) { - _printTiming = readIntArg(1, "PRINT_TIMING"); - } - - if (parentCmdStream == nullptr) { - failed("%s without corresponding command to start block", args[0].c_str()); - } - parentCmdStream->exitSubBlock(); - }; - - void run() override { - if (_printTiming) { - _blockCmdStream->printTiming(); - } - }; - - private: - CommandStream* _blockCmdStream; - - // print the stream when loop exits. - int _printTiming; -}; - - -//================================================================================================= -class SetStreamCommand : public Command { - public: - SetStreamCommand(CommandStream* cmdStream, const std::vector& args) - : Command(cmdStream, args, 1) { - int streamIndex = readIntArg(1, "STREAM_INDEX"); - - cmdStream->setStream(streamIndex); - }; - - void run() override{}; -}; - - -//================================================================================================= -class PrintTimingCommand : public Command { - public: - PrintTimingCommand(CommandStream* cmdStream, const std::vector& args) - : Command(cmdStream, args, 1) { - _iterations = readIntArg(1, "ITERATIONS"); - }; - - void run() override { _commandStream->printTiming(_iterations); }; - - private: - int _iterations; -}; - - -//================================================================================================= -CopyCommand::CopyCommand(CommandStream* cmdStream, const std::vector& args, - hipMemcpyKind kind, bool isAsync, bool isPinnedHost) - : Command(cmdStream, args), - _isAsync(isAsync), - _stream(cmdStream->currentStream()), - _kind(kind) { - switch (kind) { - case hipMemcpyDeviceToHost: - _srcType = Device; - _dstType = isPinnedHost ? PinnedHost : UnpinnedHost; - break; - case hipMemcpyHostToDevice: - _srcType = isPinnedHost ? PinnedHost : UnpinnedHost; - _dstType = Device; - break; - default: - HIPASSERT(0, "Unknown hipMemcpyKind"); - }; - - _sizeBytes = 64; // TODO, support reading from arg. - - _dst = alloc(_sizeBytes, _dstType); - _src = alloc(_sizeBytes, _srcType); -}; - - -//================================================================================================= -//================================================================================================= -// Implementations: -//================================================================================================= - -//================================================================================================= -CommandStream::CommandStream(std::string commandStreamString, int iterations) - : _iterations(iterations), - _startTime(0), - _elapsedUs(0.0), - _parentCommandStream(nullptr), - _parseInSubBlock(false) { - std::vector tokens; - tokenize(commandStreamString, ';', tokens); - - setStream(0); - std::for_each(tokens.begin(), tokens.end(), [&](const std::string s) { this->parse(s); }); -} - - -CommandStream::~CommandStream() { - std::for_each(_state._streams.begin(), _state._streams.end(), [&](hipStream_t s) { - if (s) { - HIPCHECK(hipStreamDestroy(s)); - } - }); - - std::for_each(_commands.begin(), _commands.end(), [&](Command* c) { delete c; }); -} - - -void CommandStream::setStream(int streamIndex) { - if (streamIndex >= _state._streams.size()) { - _state._streams.resize(streamIndex + 1); - } - - if (streamIndex && (_state._streams[streamIndex] == nullptr)) { - // Create new stream: - hipStream_t stream; - HIPCHECK(hipStreamCreate(&stream)); - _state._streams[streamIndex] = stream; - _state._currentStream = stream; - } else { - // Use existing stream: - - _state._currentStream = _state._streams[streamIndex]; - } -} - - -void CommandStream::tokenize(const std::string& s, char delim, std::vector& tokens) { - std::stringstream ss; - ss.str(s); - std::string item; - while (getline(ss, item, delim)) { - item.erase(std::remove(item.begin(), item.end(), ' '), item.end()); // remove whitespace. - tokens.push_back(item); - } -} - -void trim(std::string* s) { - // trim whitespace from begin and end: - const char* t = "\t\n\r\f\v"; - s->erase(0, s->find_first_not_of(t)); - s->erase(s->find_last_not_of(t) + 1); -} - -void ltrim(std::string* s) { - // trim whitespace from begin and end: - const char* t = "\t\n\r\f\v"; - s->erase(0, s->find_first_not_of(t)); -} - -void CommandStream::parse(std::string fullCmd) { - // convert to lower-case: - std::transform(fullCmd.begin(), fullCmd.end(), fullCmd.begin(), ::tolower); - trim(&fullCmd); - - if (p_db) { - printf("parse: <%s>\n", fullCmd.c_str()); - } - - - std::string c; - std::vector args; - size_t leftParenZ = fullCmd.find_first_of('('); - if (leftParenZ == string::npos) { - c = fullCmd; - args.push_back(c); - } else { - c = fullCmd.substr(0, leftParenZ); - args.push_back(c); - size_t rightParenZ = fullCmd.find_first_of(')', leftParenZ); - std::string argStr = fullCmd.substr(leftParenZ + 1, rightParenZ - leftParenZ - 1); - // printf ("c=%s argstr='%s' leftParenZ=%zu rightParenZ=%zu\n", c.c_str(), argStr.c_str(), - // leftParenZ, rightParenZ); - tokenize(argStr, ',', args); - } - - - if ((args.size() == 0) || (fullCmd.c_str()[0] == '#')) { - if (p_db) { - printf(" skip comment\n"); - } - return; - } - - - Command* cmd = NULL; - CommandStream* cmdStream = currentCommandStream(); - - if (c == "h2d") { - cmd = new CopyCommand(cmdStream, args, hipMemcpyHostToDevice, true /*isAsync*/, - true /*isPinned*/); - //= h2d - //= Performs an async host-to-device copy of array A_h to A_d. - //= The size of these arrays may be set with the datasize command. - - } else if (c == "d2h") { - cmd = new CopyCommand(cmdStream, args, hipMemcpyDeviceToHost, true /*isAsync*/, - true /*isPinned*/); - //= d2h - //= Performs an async device-to-host copy of array A_d to A_h. - //= The size of these arrays may be set with the datasize command. - - } else if (c == "modulekernel") { - cmd = new ModuleKernelCommand(cmdStream, args); - - } else if (c == "nullkernel") { - cmd = new KernelCommand(cmdStream, args, KernelCommand::Null); - //= nullkernel - //= Dispatches a null kernel to the device. - - } else if (c == "vectoraddkernel") { - cmd = new KernelCommand(cmdStream, args, KernelCommand::VectorAdd); - - } else if (c == "devicesync") { - cmd = new DeviceSyncCommand(cmdStream, args); - - } else if (c == "streamsync") { - //= streamsync - //= Execute hipStreamSynchronize. - //= This will cause the host thread to wait until the current stream - //= completes all pending operations. - cmd = new StreamSyncCommand(cmdStream, args); - - } else if (c == "setstream") { - //= setstream(STREAM_INDEX); - //= Set current stream used by subsequent commands. - //= STREAM_INDEX is index starting from 0...N. - //= This function will create new stream on first call to setstream or re-use previous - //= stream if setstream has already been called with STREAM_INDEX. - //= STREAM_INDEX=0 will use the default "null" stream associated with the device, and will - //not create a new stream. = The default stream has special, conservative synchronization - //properties. - - cmd = new SetStreamCommand(cmdStream, args); - - } else if (c == "printtiming") { - cmd = new PrintTimingCommand(cmdStream, args); - - } else if (c == "loop") { - //= loop(LOOP_CNT) - //= Loop over next set of commands (until 'endloop' command) for LOOP_CNT iterations. - //= Loops can be nested. - - cmd = new LoopCommand(cmdStream, args); - - } else if (c == "endloop") { - //= endloop - //= End a looped sequence. Must be paired with a preceding loop command. - //= Command between the `loop` and `endloop` must be executed - - CommandStream* parentCmdStream = cmdStream->getParent(); - cmd = new EndBlockCommand(cmdStream, parentCmdStream, args); - cmdStream = parentCmdStream; - - } else { - std::cerr << "error: Bad command '" << fullCmd << "\n"; - HIPASSERT(0, "bad command in command-stream"); - } - - if (cmd) { - cmdStream->_commands.push_back(cmd); - } -} - - -void CommandStream::print(const std::string& indent) const { - for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) { - (*cmdI)->print(indent); - }; -} - - -void CommandStream::printBrief(std::ostream& s) const { - for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) { - (*cmdI)->printBrief(s); - s << ";"; - }; -} - -void CommandStream::run() { - _startTime = get_time(); - for (int i = 0; i < _iterations; i++) { - for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) { - if (p_verbose) { - (*cmdI)->print(); - } - (*cmdI)->run(); - } - } - - // Record time, if not already stored. (an earlier printTime command will also store the time) - recordTime(); -}; - -void CommandStream::recordTime() { - if (_elapsedUs == 0.0) { - auto stopTime = get_time(); - _elapsedUs = stopTime - _startTime; - } -} - - -void CommandStream::printTiming(int iterations) { - if ((_state._subBlocks.size() == 1) && (_commands.size() == 1)) { - // printf ("print just the loop\n"); - _state._subBlocks.front()->printTiming(iterations); - } else { - g_printedTiming = true; - - recordTime(); - if (iterations == 0) { - iterations = _iterations; - } - std::cout << "command<"; - printBrief(std::cout); - std::cout << ">,"; - printf(" iterations,%d, total_time,%6.3f, time/iteration,%6.3f\n", iterations, - _elapsedUs, _elapsedUs / iterations); - } -}; - - -//================================================================================================= -int main(int argc, char* argv[]) { - parseStandardArguments(argc, argv); - - printConfig(); - - CommandStream* cs; - - if (p_blockingSync) { -#ifdef __HIP_PLATFORM_AMD__ - printf("setting BlockingSync for AMD\n"); - #ifdef _WIN32 - _putenv_s("HIP_BLOCKING_SYNC", "1"); - #else - setenv("HIP_BLOCKING_SYNC", "1", 1); - #endif -#endif -#ifdef __HIP_PLATFORM_NVIDIA__ - printf("setting cudaDeviceBlockingSync\n"); - HIPCHECK(hipSetDeviceFlags(cudaDeviceBlockingSync)); -#endif - }; - - - if (p_file) { - // TODO - catch exception on file IO here: - std::ifstream file(p_file); - std::string str; - std::string file_contents; - while (std::getline(file, str)) { - file_contents += str; - } - - cs = new CommandStream(file_contents, p_iterations); - - } else { - cs = new CommandStream(p_command, p_iterations); - } - - cs->print(); - printf("------\n"); - - cs->run(); - if (!g_printedTiming) { - cs->printTiming(); - } - - delete cs; -} - - -// TODO - add error checking for arguments. diff --git a/samples/1_Utils/hipCommander/l2.hcm b/samples/1_Utils/hipCommander/l2.hcm deleted file mode 100644 index 6b14f7b829..0000000000 --- a/samples/1_Utils/hipCommander/l2.hcm +++ /dev/null @@ -1,3 +0,0 @@ -setstream(1); -NullKernel; streamsync; -loop(10000); H2D; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/loop.hcm b/samples/1_Utils/hipCommander/loop.hcm deleted file mode 100644 index 4cb980eccb..0000000000 --- a/samples/1_Utils/hipCommander/loop.hcm +++ /dev/null @@ -1,3 +0,0 @@ -loop(1000); H2D; NullKernel; D2H; endloop; -streamsync; -printTiming(1000) diff --git a/samples/1_Utils/hipCommander/loop2.hcm b/samples/1_Utils/hipCommander/loop2.hcm deleted file mode 100644 index ae753d0722..0000000000 --- a/samples/1_Utils/hipCommander/loop2.hcm +++ /dev/null @@ -1,2 +0,0 @@ -setstream(1); -loop(1000); NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/nullkernel.hip.cpp b/samples/1_Utils/hipCommander/nullkernel.hip.cpp deleted file mode 100644 index 8016f109c7..0000000000 --- a/samples/1_Utils/hipCommander/nullkernel.hip.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "hip/hip_runtime.h" - -extern "C" __global__ void NullKernel(float* Ad) { - if (Ad) { - Ad[0] = 42; - } -} diff --git a/samples/1_Utils/hipCommander/nullkernel.hsaco b/samples/1_Utils/hipCommander/nullkernel.hsaco deleted file mode 100755 index da6a3e6823..0000000000 Binary files a/samples/1_Utils/hipCommander/nullkernel.hsaco and /dev/null differ diff --git a/samples/1_Utils/hipCommander/perf/latency2.hcm b/samples/1_Utils/hipCommander/perf/latency2.hcm deleted file mode 100644 index e43960dc5a..0000000000 --- a/samples/1_Utils/hipCommander/perf/latency2.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -NullKernel; streamsync; -loop(30000); NullKernel; streamsync; endloop(1); -loop(30000); H2D; H2D; NullKernel; streamsync; endloop(1); -loop(30000); H2D; H2D; H2D; NullKernel; streamsync; endloop(1); - -loop(30000); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(30000); NullKernel; D2H; streamsync; endloop(1); -loop(30000); NullKernel; D2H; D2H; streamsync; endloop(1); -loop(30000); NullKernel; D2H; D2H; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/latency_hostsync.hcm b/samples/1_Utils/hipCommander/perf/latency_hostsync.hcm deleted file mode 100644 index f042b446e3..0000000000 --- a/samples/1_Utils/hipCommander/perf/latency_hostsync.hcm +++ /dev/null @@ -1,8 +0,0 @@ -setstream(1); -NullKernel; streamsync; -loop(100000); NullKernel; streamsync; endloop(1); - -loop(100000); H2D; streamsync; NullKernel; streamsync; endloop(1); - -loop(100000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); - diff --git a/samples/1_Utils/hipCommander/perf/latency_nosync.hcm b/samples/1_Utils/hipCommander/perf/latency_nosync.hcm deleted file mode 100644 index 682d9d8b30..0000000000 --- a/samples/1_Utils/hipCommander/perf/latency_nosync.hcm +++ /dev/null @@ -1,5 +0,0 @@ -setstream(1); -NullKernel; streamsync; -loop(100000); NullKernel; streamsync; endloop(1); -loop(100000); H2D; NullKernel; streamsync; endloop(1); -loop(100000); H2D; NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/latency_nullstream.hcm b/samples/1_Utils/hipCommander/perf/latency_nullstream.hcm deleted file mode 100644 index 87968a4df9..0000000000 --- a/samples/1_Utils/hipCommander/perf/latency_nullstream.hcm +++ /dev/null @@ -1,7 +0,0 @@ -setstream(0); -NullKernel; streamsync; -loop(100000); NullKernel; streamsync; endloop(1); - -loop(100000); H2D; NullKernel; streamsync; endloop(1); - -loop(100000); H2D; NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/modulelaunch_latency.hcm b/samples/1_Utils/hipCommander/perf/modulelaunch_latency.hcm deleted file mode 100644 index 576208135c..0000000000 --- a/samples/1_Utils/hipCommander/perf/modulelaunch_latency.hcm +++ /dev/null @@ -1,5 +0,0 @@ -setstream(1); -NullKernel; streamsync; -loop(100); ModuleKernel; streamsync; endloop(1); -loop(100); streamsync; endloop(1); -loop(3000); NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_h2d.hcm deleted file mode 100644 index 640bb2be79..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_kernel.hcm deleted file mode 100644 index c1bc0f6702..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_h2d.hcm deleted file mode 100644 index 0e787f9bd0..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_kernel.hcm deleted file mode 100644 index 8d7fddc146..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_d2h_sync_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_d2h.hcm deleted file mode 100644 index 7d845d03a4..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_kernel.hcm deleted file mode 100644 index 49c0d77146..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1); -loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1); -loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_d2h.hcm deleted file mode 100644 index fe1f14bee5..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_kernel.hcm deleted file mode 100644 index 0762001daa..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_h2d_sync_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_d2h.hcm deleted file mode 100644 index 88003ba476..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); -loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_h2d.hcm deleted file mode 100644 index 01913f8481..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); -loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_d2h.hcm deleted file mode 100644 index 530eb8f68e..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_h2d.hcm deleted file mode 100644 index 6d83ee87c9..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_kernel_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2_sync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2_sync.hcm deleted file mode 100644 index 8b9e233a9e..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2_sync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); streamsync; streamsync; endloop(1); -loop(10); streamsync; streamsync; endloop(1); -loop(100); streamsync; streamsync; endloop(1); -loop(100); streamsync; streamsync; endloop(1); -loop(1000); streamsync; streamsync; endloop(1); -loop(1000); streamsync; streamsync; endloop(1); -loop(10000); streamsync; streamsync; endloop(1); -loop(10000); streamsync; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h.hcm deleted file mode 100644 index 83cdc4ff75..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; D2H; streamsync; endloop(1); -loop(10); D2H; streamsync; D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; D2H; streamsync; endloop(1); -loop(1000); D2H;streamsync; D2H; streamsync; endloop(1); -loop(1000); D2H; streamsync; D2H; streamsync; endloop(1); -loop(1000); D2H; streamsync; D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h_wosync.hcm deleted file mode 100644 index 4b91403582..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2d2h_wosync.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); D2H; D2H; streamsync; endloop(1); -loop(10); D2H; D2H; streamsync; endloop(1); -loop(100); D2H; D2H; streamsync; endloop(1); -loop(100); D2H; D2H; streamsync; endloop(1); -loop(1000); D2H; D2H; streamsync; endloop(1); -loop(1000); D2H; D2H; streamsync; endloop(1); -loop(1000); D2H; D2H; streamsync; endloop(1); -loop(10000); D2H; D2H; streamsync; endloop(1); -loop(10000); D2H; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d.hcm deleted file mode 100644 index a2e4311bf6..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; H2D; streamsync; endloop(1); -loop(10); H2D; streamsync; H2D; streamsync; endloop(1); -loop(100); H2D; streamsync; H2D; streamsync; endloop(1); -loop(100); H2D; streamsync; H2D; streamsync; endloop(1); -loop(1000); H2D;streamsync; H2D; streamsync; endloop(1); -loop(1000); H2D; streamsync; H2D; streamsync; endloop(1); -loop(1000); H2D; streamsync; H2D; streamsync; endloop(1); -loop(10000); H2D; streamsync; H2D; streamsync; endloop(1); -loop(10000); H2D; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel.hcm deleted file mode 100644 index 0c622614cc..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); -loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel_wosync.hcm deleted file mode 100644 index d73467da10..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_kernel_wosync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1); -loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1); -loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1); -loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D ; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_wosync.hcm deleted file mode 100644 index 35f5e68522..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2h2d_wosync.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; H2D; streamsync; endloop(1); -loop(10); H2D; H2D; streamsync; endloop(1); -loop(100); H2D; H2D; streamsync; endloop(1); -loop(100); H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; streamsync; endloop(1); -loop(10000); H2D; H2D; streamsync; endloop(1); -loop(10000); H2D; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels.hcm deleted file mode 100644 index 3b85c6bef8..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels_wosync.hcm deleted file mode 100644 index 584d6b8021..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_2kernels_wosync.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; NullKernel; streamsync; endloop(1); -loop(10); NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_h2d.hcm deleted file mode 100644 index 7f0fce96c5..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_kernel.hcm deleted file mode 100644 index a384439b5c..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_h2d.hcm deleted file mode 100644 index 1cab6ff0d2..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_kernel.hcm deleted file mode 100644 index ff5b09a3dc..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_d2h_sync_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); -loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_d2h.hcm deleted file mode 100644 index d8921a64e7..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_d2h.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1); - diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_kernel.hcm deleted file mode 100644 index 4ccbf9a83c..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_d2h.hcm deleted file mode 100644 index a3d9a282f5..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_kernel.hcm deleted file mode 100644 index 56554d15fd..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_h2d_sync_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_d2h.hcm deleted file mode 100644 index a6e3a683d2..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_h2d.hcm deleted file mode 100644 index eae3eadc5b..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); -loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_d2h.hcm deleted file mode 100644 index 9e21709b0b..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_h2d.hcm deleted file mode 100644 index b1ef7ef9f4..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_kernel_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3_sync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3_sync.hcm deleted file mode 100644 index bc8d21c594..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3_sync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10);streamsync; streamsync; streamsync; endloop(1); -loop(10);streamsync; streamsync; streamsync; endloop(1); -loop(100);streamsync; streamsync; streamsync; endloop(1); -loop(100);streamsync; streamsync; streamsync; endloop(1); -loop(1000);streamsync; streamsync; streamsync; endloop(1); -loop(1000);streamsync; streamsync; streamsync; endloop(1); -loop(10000);streamsync; streamsync; streamsync; endloop(1); -loop(10000);streamsync; streamsync; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h.hcm deleted file mode 100644 index 4e07574b99..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(1000); D2H;streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(1000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h_wosync.hcm deleted file mode 100644 index e96707fed9..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3d2h_wosync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; D2H; D2H; streamsync; endloop(1); -loop(10); D2H; D2H; D2H; streamsync; endloop(1); -loop(100); D2H; D2H; D2H; streamsync; endloop(1); -loop(100); D2H; D2H; D2H; streamsync; endloop(1); -loop(1000); D2H; D2H; D2H; streamsync; endloop(1); -loop(1000); D2H; D2H; D2H; streamsync; endloop(1); -loop(10000); D2H; D2H; D2H; streamsync; endloop(1); -loop(10000); D2H; D2H; D2H;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d.hcm deleted file mode 100644 index 82151adb8b..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1); -loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1); -loop(100); H2D; streamsync;H2D; streamsync;H2D; streamsync; endloop(1); -loop(100); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1); -loop(1000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1); -loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1); -loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1); -loop(10000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1); -loop(10000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d_wosync.hcm deleted file mode 100644 index 7d96bfcfab..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3h2d_wosync.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; H2D; H2D; streamsync; endloop(1); -loop(10); H2D; H2D; H2D; streamsync; endloop(1); -loop(100); H2D; H2D; H2D; streamsync; endloop(1); -loop(100); H2D; H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; H2D; streamsync; endloop(1); -loop(1000); H2D; H2D; H2D; streamsync; endloop(1); -loop(10000); H2D; H2D; H2D; streamsync; endloop(1); -loop(10000); H2D; H2D; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels.hcm deleted file mode 100644 index 2e8306dfde..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels_wosync.hcm deleted file mode 100644 index 85cd0dd4d2..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_3kernels_wosync.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_4kernels.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_4kernels.hcm deleted file mode 100644 index 48a8223626..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_4kernels.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_5kernels.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_5kernels.hcm deleted file mode 100644 index 70ad00c248..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_5kernels.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel; streamsync; endloop(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_6kernels.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_6kernels.hcm deleted file mode 100644 index 1bbb5694b1..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_6kernels.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;NullKernel; streamsync; endloop(1); -loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;streamsync; endloop(1); -loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); -loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_d2h.hcm deleted file mode 100644 index 54f06a3481..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; endloop(1); -loop(10); D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; endloop(1); -loop(100); D2H; streamsync; endloop(1); -loop(1000); D2H; streamsync; endloop(1); -loop(1000); D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; endloop(1); -loop(10000); D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_h2d.hcm deleted file mode 100644 index 6667ba95fa..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; H2D; streamsync; endloop(1); -loop(10); D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync; endloop(1); -loop(100); D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync; endloop(1); -loop(1000); D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync; endloop(1); -loop(10000); D2H; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_kernel.hcm deleted file mode 100644 index fe770c5e9d..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; NullKernel; streamsync; endloop(1); -loop(10); D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync; endloop(1); -loop(100); D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync; endloop(1); -loop(1000); D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync; endloop(1); -loop(10000); D2H; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_h2d.hcm deleted file mode 100644 index 20ec951509..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync; H2D; streamsync; endloop(1); -loop(10); D2H; streamsync; H2D; streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync; endloop(1); -loop(100); D2H; streamsync; H2D; streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync; endloop(1); -loop(1000); D2H; streamsync; H2D; streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync; endloop(1); -loop(10000); D2H; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_kernel.hcm deleted file mode 100644 index 77e483b3df..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_d2h_sync_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1); -loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d.hcm deleted file mode 100644 index f5642bfdf0..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; endloop(1); -loop(10); H2D; streamsync; endloop(1); -loop(100); H2D; streamsync; endloop(1); -loop(100); H2D; streamsync; endloop(1); -loop(1000); H2D; streamsync; endloop(1); -loop(1000); H2D; streamsync; endloop(1); -loop(1000); H2D; streamsync; endloop(1); -loop(10000); H2D; streamsync; endloop(1); -loop(10000); H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_10.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_10.hcm deleted file mode 100644 index 05452b9c87..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_10.hcm +++ /dev/null @@ -1,2 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_d2h.hcm deleted file mode 100644 index dad9fc7437..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; D2H; streamsync; endloop(1); -loop(10); H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync; endloop(1); -loop(100); H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync; endloop(1); -loop(1000); H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync; endloop(1); -loop(10000); H2D; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel.hcm deleted file mode 100644 index 1b60640b9e..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; NullKernel; streamsync; endloop(1); -loop(10); H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h.hcm deleted file mode 100644 index 6e4e9f3544..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10);H2D; streamsync; NullKernel; D2H; streamsync;endloop(1); -loop(10); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h_wosync.hcm deleted file mode 100644 index 4e94a26ebf..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_d2h_wosync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10);H2D; NullKernel; D2H; streamsync;endloop(1); -loop(10); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(100); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(100); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1); -loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_wosync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_wosync.hcm deleted file mode 100644 index b3b40d3190..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_kernel_wosync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; NullKernel; streamsync; endloop(1); -loop(10); H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync; endloop(1); -loop(100); H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; endloop(1); -loop(1000); H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D; NullKernel; streamsync; endloop(1); -loop(10000); H2D ; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_d2h.hcm deleted file mode 100644 index 030213d1b3..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; D2H; streamsync; endloop(1); -loop(10); H2D; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync.hcm deleted file mode 100644 index 146c74bcae..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync_d2h.hcm deleted file mode 100644 index 366d04f469..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_h2d_sync_kernel_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10);H2D; streamsync; NullKernel;streamsync; D2H; streamsync;endloop(1); -loop(10); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel.hcm deleted file mode 100644 index 027d89aad0..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel.hcm +++ /dev/null @@ -1,10 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; endloop(1); -loop(10); NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; endloop(1); -loop(100); NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_barrier.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_barrier.hcm deleted file mode 100644 index fb6a867e7f..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_barrier.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; streamsync; endloop(1); -loop(10); NullKernel; streamsync; streamsync; endloop(1); -loop(100); NullKernel; streamsync; streamsync; endloop(1); -loop(100); NullKernel; streamsync; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_d2h.hcm deleted file mode 100644 index 2e64472dbd..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; D2H; streamsync; endloop(1); -loop(10); NullKernel; D2H; streamsync; endloop(1); -loop(100); NullKernel; D2H; streamsync; endloop(1); -loop(100); NullKernel; D2H; streamsync; endloop(1); -loop(1000); NullKernel; D2H; streamsync; endloop(1); -loop(1000); NullKernel; D2H; streamsync; endloop(1); -loop(10000); NullKernel; D2H; streamsync; endloop(1); -loop(10000); NullKernel; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_h2d.hcm deleted file mode 100644 index b220a69c68..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; H2D; streamsync; endloop(1); -loop(10); NullKernel; H2D; streamsync; endloop(1); -loop(100); NullKernel; H2D; streamsync; endloop(1); -loop(100); NullKernel; H2D; streamsync; endloop(1); -loop(1000); NullKernel; H2D; streamsync; endloop(1); -loop(1000); NullKernel; H2D; streamsync; endloop(1); -loop(10000); NullKernel; H2D; streamsync; endloop(1); -loop(10000); NullKernel; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_d2h.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_d2h.hcm deleted file mode 100644 index 48b332b1c3..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_d2h.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_h2d.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_h2d.hcm deleted file mode 100644 index 5a45d55376..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_kernel_sync_h2d.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1); -loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_streamcreate.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_streamcreate.hcm deleted file mode 100644 index 1e6aef5dc1..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_streamcreate.hcm +++ /dev/null @@ -1,2 +0,0 @@ -setstream(1); -loop(10);setstream(1);setstream(2);setstream(3);setstream(4);setstream(5);streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/perf/scripts/latency_sync.hcm b/samples/1_Utils/hipCommander/perf/scripts/latency_sync.hcm deleted file mode 100644 index a784013b1a..0000000000 --- a/samples/1_Utils/hipCommander/perf/scripts/latency_sync.hcm +++ /dev/null @@ -1,9 +0,0 @@ -setstream(1); -loop(10); streamsync; endloop(1); -loop(10); streamsync; endloop(1); -loop(100); streamsync; endloop(1); -loop(100); streamsync; endloop(1); -loop(1000); streamsync; endloop(1); -loop(1000); streamsync; endloop(1); -loop(10000); streamsync; endloop(1); -loop(10000); streamsync; endloop(1); diff --git a/samples/1_Utils/hipCommander/setstream.hcm b/samples/1_Utils/hipCommander/setstream.hcm deleted file mode 100644 index 22f1931ac4..0000000000 --- a/samples/1_Utils/hipCommander/setstream.hcm +++ /dev/null @@ -1,3 +0,0 @@ -setstream(1); -setstream(2); H2D; NullKernel; D2H; -streamsync diff --git a/samples/1_Utils/hipCommander/testcase.cpp b/samples/1_Utils/hipCommander/testcase.cpp deleted file mode 100644 index 9be1c0c644..0000000000 --- a/samples/1_Utils/hipCommander/testcase.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include - -static const int BLOCKSIZEX = 32; -static const int BLOCKSIZEY = 16; - -__global__ void fails(float* pErrorI) { - if (pErrorI != 0) { - pErrorI[0] = 1; - } -} - -int main() { - dim3 blocks(1, 1); - dim3 threads(BLOCKSIZEX, BLOCKSIZEY); - float error; - - hipLaunchKernelGGL(HIP_KERNEL_NAME(fails), blocks, threads, 0, 0, &error); -}