SWDEV-389703 - Remove hipBusBandwidth and hipCommander samples (#277)

Change-Id: Id65eab4d0cc524d7cacac4fbf1d3b2c3a640eb77

[ROCm/hip-tests commit: fc9d6d2df5]
Este commit está contenido en:
ROCm CI Service Account
2023-05-24 04:52:07 +05:30
cometido por GitHub
padre 3daebe2d75
commit 1c58ccfeee
Se han modificado 93 ficheros con 0 adiciones y 3952 borrados
-1
Ver fichero
@@ -13,5 +13,4 @@ samples/0_Intro/module_api/vcpy_isa.hsaco
samples/0_Intro/module_api/vcpy_kernel.co
samples/0_Intro/module_api/vcpy_kernel.code
samples/1_Utils/hipInfo/hipInfo
samples/1_Utils/hipBusBandwidth/hipBusBandwidth
samples/1_Utils/hipDispatchLatency/hipDispatchLatency
@@ -1,44 +0,0 @@
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
project(hipBusBandwidth)
cmake_minimum_required(VERSION 3.10)
if (NOT DEFINED ROCM_PATH )
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
# Find hip
find_package(hip)
# Set compiler and linker
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(hipBusBandwidth hipBusBandwidth.cpp ResultDatabase.cpp)
# Link with HIP
target_link_libraries(hipBusBandwidth hip::host)
@@ -1,27 +0,0 @@
Copyright (c) 2011, UT-Battelle, LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor
the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,43 +0,0 @@
# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
ifeq ($(OS),Windows_NT)
$(error Makefile is not supported on windows platform. Please use cmake instead to build sample.)
endif
ROCM_PATH?= $(wildcard /opt/rocm/)
HIP_PATH?= $(wildcard $(ROCM_PATH)/hip)
ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
EXE=hipBusBandwidth
CXXFLAGS = -O3
all: install
$(EXE): hipBusBandwidth.cpp ResultDatabase.cpp
$(HIPCC) $(CXXFLAGS) $^ -o $@
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
clean:
rm -f *.o $(EXE)
@@ -1,462 +0,0 @@
#include "ResultDatabase.h"
#include <cfloat>
#include <algorithm>
#include <cmath>
#include <iomanip>
using namespace std;
#define SORT_RETAIN_ATTS_ORDER 1
bool ResultDatabase::Result::operator<(const Result& rhs) const {
if (test < rhs.test) return true;
if (test > rhs.test) return false;
#if (SORT_RETAIN_ATTS_ORDER == 0)
// For ties, sort by the value of the attribute:
if (atts < rhs.atts) return true;
if (atts > rhs.atts) return false;
#endif
return false; // less-operator returns false on equal
}
double ResultDatabase::Result::GetMin() const {
double r = FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = min(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMax() const {
double r = -FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = max(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); }
double ResultDatabase::Result::GetPercentile(double q) const {
int n = value.size();
if (n == 0) return FLT_MAX;
if (n == 1) return value[0];
if (q <= 0) return value[0];
if (q >= 100) return value[n - 1];
double index = ((n + 1.) * q / 100.) - 1;
vector<double> sorted = value;
sort(sorted.begin(), sorted.end());
if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.));
int index_lo = int(index);
double frac = index - index_lo;
if (frac == 0) return sorted[index_lo];
double lo = sorted[index_lo];
double hi = sorted[index_lo + 1];
return lo + (hi - lo) * frac;
}
double ResultDatabase::Result::GetMean() const {
double r = 0;
for (int i = 0; i < value.size(); i++) {
r += value[i];
}
return r / double(value.size());
}
double ResultDatabase::Result::GetStdDev() const {
double r = 0;
double u = GetMean();
if (u == FLT_MAX) return FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r += (value[i] - u) * (value[i] - u);
}
r = sqrt(r / value.size());
return r;
}
void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values) {
for (int i = 0; i < values.size(); i++) {
AddResult(test, atts, unit, values[i]);
}
}
static string RemoveAllButLeadingSpaces(const string& a) {
string b;
int n = a.length();
int i = 0;
while (i < n && a[i] == ' ') {
b += a[i];
++i;
}
for (; i < n; i++) {
if (a[i] != ' ' && a[i] != '\t') b += a[i];
}
return b;
}
void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig,
const string& unit_orig, double value) {
string test = RemoveAllButLeadingSpaces(test_orig);
string atts = RemoveAllButLeadingSpaces(atts_orig);
string unit = RemoveAllButLeadingSpaces(unit_orig);
int index;
for (index = 0; index < results.size(); index++) {
if (results[index].test == test && results[index].atts == atts) {
if (results[index].unit != unit) throw "Internal error: mixed units";
break;
}
}
if (index >= results.size()) {
Result r;
r.test = test;
r.atts = atts;
r.unit = unit;
results.push_back(r);
}
results[index].value.push_back(value);
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the full results, including all trials.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: August 14, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010
// Renamed to DumpDetailed to make room for a DumpSummary.
//
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010
// Changed note about missing values to be worded a little better.
//
// ****************************************************************************
void ResultDatabase::DumpDetailed(ostream& out) {
vector<Result> sorted(results);
stable_sort(sorted.begin(), sorted.end());
const int testNameW = 24;
const int attW = 12;
const int fieldW = 11;
out << std::fixed << right << std::setprecision(4);
int maxtrials = 1;
for (int i = 0; i < sorted.size(); i++) {
if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size();
}
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
for (int j = 0; j < r.value.size(); j++) {
if (r.value[j] == FLT_MAX)
out << "N/A\t";
else
out << r.value[j] << "\t";
}
out << endl;
}
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: November 10, 2010
//
// Modifications:
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// ****************************************************************************
void ResultDatabase::DumpSummary(ostream& out) {
vector<Result> sorted(results);
stable_sort(sorted.begin(), sorted.end());
const int testNameW = 24;
const int attW = 12;
const int fieldW = 9;
out << std::fixed << right << std::setprecision(4);
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t"
<< "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
out << endl;
}
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::ClearAllResults
//
// Purpose:
// Clears all existing results from the ResultDatabase; used for multiple passes
// of the same test or multiple tests.
//
// Arguments:
//
// Programmer: Jeffrey Young
// Creation: September 10th, 2014
//
// Modifications:
//
//
// ****************************************************************************
void ResultDatabase::ClearAllResults() { results.clear(); }
// ****************************************************************************
// Method: ResultDatabase::DumpCsv
//
// Purpose:
// Writes either detailed or summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out file to print CSV results
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
void ResultDatabase::DumpCsv(string fileName) {
bool emptyFile;
vector<Result> sorted(results);
stable_sort(sorted.begin(), sorted.end());
// Check to see if the file is empty - if so, add the headers
emptyFile = this->IsFileEmpty(fileName);
// Open file and append by default
ofstream out;
out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app);
// Add headers only for empty files
if (emptyFile) {
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test, "
<< "atts, "
<< "units, "
<< "median, "
<< "mean, "
<< "stddev, "
<< "min, "
<< "max, ";
out << endl;
}
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << r.test << ", ";
out << r.atts << ", ";
out << r.unit << ", ";
if (r.GetMedian() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMedian() << ", ";
if (r.GetMean() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMean() << ", ";
if (r.GetStdDev() == FLT_MAX)
out << "N/A, ";
else
out << r.GetStdDev() << ", ";
if (r.GetMin() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMin() << ", ";
if (r.GetMax() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMax() << ", ";
out << endl;
}
out << endl;
out.close();
}
// ****************************************************************************
// Method: ResultDatabase::IsFileEmpty
//
// Purpose:
// Returns whether a file is empty - used as a helper for CSV printing
//
// Arguments:
// file The input file to check for emptiness
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
bool ResultDatabase::IsFileEmpty(string fileName) {
ifstream file(fileName.c_str());
// If the file doesn't exist it is by definition empty
if (!file.good()) {
return true;
} else {
bool fileEmpty;
fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof());
file.close();
return fileEmpty;
}
// Otherwise, return false
return false;
}
// ****************************************************************************
// Method: ResultDatabase::GetResultsForTest
//
// Purpose:
// Returns a vector of results for just one test name.
//
// Arguments:
// test the name of the test results to search for
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
vector<ResultDatabase::Result> ResultDatabase::GetResultsForTest(const string& test) {
// get only the given test results
vector<Result> retval;
for (int i = 0; i < results.size(); i++) {
Result& r = results[i];
if (r.test == test) retval.push_back(r);
}
return retval;
}
// ****************************************************************************
// Method: ResultDatabase::GetResults
//
// Purpose:
// Returns all the results.
//
// Arguments:
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
const vector<ResultDatabase::Result>& ResultDatabase::GetResults() const { return results; }
@@ -1,89 +0,0 @@
#ifndef RESULT_DATABASE_H
#define RESULT_DATABASE_H
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <cfloat>
using std::ifstream;
using std::ofstream;
using std::ostream;
using std::string;
using std::vector;
// ****************************************************************************
// Class: ResultDatabase
//
// Purpose:
// Track numerical results as they are generated.
// Print statistics of raw results.
//
// Programmer: Jeremy Meredith
// Creation: June 12, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
// Split timing reports into detailed and summary. E.g. for serial code,
// we might report all trial values, but skip them in parallel.
//
// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010
// Added check for missing value tag.
//
// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010
// Added percentile statistic.
//
// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010
// Added a method to extract a subset of results based on test name. Also,
// the Result class is now public, so that clients can use them directly.
// Added a GetResults method as well, and made several functions const.
//
// ****************************************************************************
class ResultDatabase {
public:
//
// A performance result for a single SHOC benchmark run.
//
struct Result {
string test; // e.g. "readback"
string atts; // e.g. "pagelocked 4k^2"
string unit; // e.g. "MB/sec"
vector<double> value; // e.g. "837.14"
double GetMin() const;
double GetMax() const;
double GetMedian() const;
double GetPercentile(double q) const;
double GetMean() const;
double GetStdDev() const;
bool operator<(const Result& rhs) const;
bool HadAnyFLTMAXValues() const {
for (int i = 0; i < value.size(); ++i) {
if (value[i] >= FLT_MAX) return true;
}
return false;
}
};
protected:
vector<Result> results;
public:
void AddResult(const string& test, const string& atts, const string& unit, double value);
void AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values);
vector<Result> GetResultsForTest(const string& test);
const vector<Result>& GetResults() const;
void ClearAllResults();
void DumpDetailed(ostream&);
void DumpSummary(ostream&);
void DumpCsv(string fileName);
private:
bool IsFileEmpty(string fileName);
};
#endif
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
@@ -1,55 +0,0 @@
# Copyright (c) 2020 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
project(hipCommander)
cmake_minimum_required(VERSION 3.10)
if (NOT DEFINED ROCM_PATH )
set ( ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory." )
endif ()
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH})
# Find hip
find_package(hip)
# Set compiler and linker
set(CMAKE_CXX_COMPILER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_CXX_LINKER ${HIP_HIPCC_EXECUTABLE})
set(CMAKE_BUILD_TYPE Release)
# Create the excutable
add_executable(hipCommander hipCommander.cpp)
# Generate code object
add_custom_target(
codeobj
ALL
COMMAND ${HIP_HIPCC_EXECUTABLE} --genco ../nullkernel.hip.cpp -o nullkernel.hsaco
COMMENT "codeobj generated"
)
add_dependencies(hipCommander codeobj)
# Link with HIP
target_link_libraries(hipCommander hip::host)
set_property(TARGET hipCommander PROPERTY CXX_STANDARD 11)
@@ -1,27 +0,0 @@
Copyright (c) 2011, UT-Battelle, LLC
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Oak Ridge National Laboratory, nor UT-Battelle, LLC, nor
the names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -1,53 +0,0 @@
# Copyright (c) 2016 - 2021 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
ifeq ($(OS),Windows_NT)
$(error Makefile is not supported on windows platform. Please use cmake instead to build sample.)
endif
ROCM_PATH?= $(wildcard /opt/rocm/)
HIP_PATH?= $(wildcard $(ROCM_PATH)/hip)
ifeq (,$(HIP_PATH))
HIP_PATH=../../..
endif
HIPCC=$(HIP_PATH)/bin/hipcc
EXE=hipCommander
OPT=-O3
#CXXFLAGS = -O3 -g
CXXFLAGS = $(OPT) --std=c++11
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --platform)
CODE_OBJECTS=nullkernel.hsaco
all: ${EXE} ${CODE_OBJECTS}
$(EXE): hipCommander.cpp
$(HIPCC) $(CXXFLAGS) $^ -o $@
nullkernel.hsaco : nullkernel.hip.cpp
$(HIPCC) --genco nullkernel.hip.cpp -o nullkernel.hsaco
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
clean:
rm -f *.o *.co $(EXE)
@@ -1,454 +0,0 @@
#include "ResultDatabase.h"
#include <cfloat>
#include <algorithm>
#include <cmath>
#include <iomanip>
using namespace std;
bool ResultDatabase::Result::operator<(const Result& rhs) const {
if (test < rhs.test) return true;
if (test > rhs.test) return false;
if (atts < rhs.atts) return true;
if (atts > rhs.atts) return false;
return false; // less-operator returns false on equal
}
double ResultDatabase::Result::GetMin() const {
double r = FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = min(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMax() const {
double r = -FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r = max(r, value[i]);
}
return r;
}
double ResultDatabase::Result::GetMedian() const { return GetPercentile(50); }
double ResultDatabase::Result::GetPercentile(double q) const {
int n = value.size();
if (n == 0) return FLT_MAX;
if (n == 1) return value[0];
if (q <= 0) return value[0];
if (q >= 100) return value[n - 1];
double index = ((n + 1.) * q / 100.) - 1;
vector<double> sorted = value;
sort(sorted.begin(), sorted.end());
if (n == 2) return (sorted[0] * (1 - q / 100.) + sorted[1] * (q / 100.));
int index_lo = int(index);
double frac = index - index_lo;
if (frac == 0) return sorted[index_lo];
double lo = sorted[index_lo];
double hi = sorted[index_lo + 1];
return lo + (hi - lo) * frac;
}
double ResultDatabase::Result::GetMean() const {
double r = 0;
for (int i = 0; i < value.size(); i++) {
r += value[i];
}
return r / double(value.size());
}
double ResultDatabase::Result::GetStdDev() const {
double r = 0;
double u = GetMean();
if (u == FLT_MAX) return FLT_MAX;
for (int i = 0; i < value.size(); i++) {
r += (value[i] - u) * (value[i] - u);
}
r = sqrt(r / value.size());
return r;
}
void ResultDatabase::AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values) {
for (int i = 0; i < values.size(); i++) {
AddResult(test, atts, unit, values[i]);
}
}
static string RemoveAllButLeadingSpaces(const string& a) {
string b;
int n = a.length();
int i = 0;
while (i < n && a[i] == ' ') {
b += a[i];
++i;
}
for (; i < n; i++) {
if (a[i] != ' ' && a[i] != '\t') b += a[i];
}
return b;
}
void ResultDatabase::AddResult(const string& test_orig, const string& atts_orig,
const string& unit_orig, double value) {
string test = RemoveAllButLeadingSpaces(test_orig);
string atts = RemoveAllButLeadingSpaces(atts_orig);
string unit = RemoveAllButLeadingSpaces(unit_orig);
int index;
for (index = 0; index < results.size(); index++) {
if (results[index].test == test && results[index].atts == atts) {
if (results[index].unit != unit) throw "Internal error: mixed units";
break;
}
}
if (index >= results.size()) {
Result r;
r.test = test;
r.atts = atts;
r.unit = unit;
results.push_back(r);
}
results[index].value.push_back(value);
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the full results, including all trials.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: August 14, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:25:17 EST 2010
// Renamed to DumpDetailed to make room for a DumpSummary.
//
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// Jeremy Meredith, Tue Nov 23 13:57:02 EST 2010
// Changed note about missing values to be worded a little better.
//
// ****************************************************************************
void ResultDatabase::DumpDetailed(ostream& out) {
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
const int testNameW = 24;
const int attW = 12;
const int fieldW = 11;
out << std::fixed << right << std::setprecision(4);
int maxtrials = 1;
for (int i = 0; i < sorted.size(); i++) {
if (sorted[i].value.size() > maxtrials) maxtrials = sorted[i].value.size();
}
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
for (int i = 0; i < maxtrials; i++) out << "trial" << i << "\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
for (int j = 0; j < r.value.size(); j++) {
if (r.value[j] == FLT_MAX)
out << "N/A\t";
else
out << r.value[j] << "\t";
}
out << endl;
}
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::DumpDetailed
//
// Purpose:
// Writes the summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out where to print
//
// Programmer: Jeremy Meredith
// Creation: November 10, 2010
//
// Modifications:
// Jeremy Meredith, Thu Nov 11 11:39:57 EST 2010
// Added note about (*) missing value tag.
//
// ****************************************************************************
void ResultDatabase::DumpSummary(ostream& out) {
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
const int testNameW = 24;
const int attW = 12;
const int fieldW = 9;
out << std::fixed << right << std::setprecision(4);
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << setw(testNameW) << "test\t" << setw(attW) << "atts\t" << setw(fieldW) << "units\t"
<< "median\t"
<< "mean\t"
<< "stddev\t"
<< "min\t"
<< "max\t";
out << endl;
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << setw(testNameW) << r.test + "\t";
out << setw(attW) << r.atts + "\t";
out << setw(fieldW) << r.unit + "\t";
if (r.GetMedian() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMedian() << "\t";
if (r.GetMean() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMean() << "\t";
if (r.GetStdDev() == FLT_MAX)
out << "N/A\t";
else
out << r.GetStdDev() << "\t";
if (r.GetMin() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMin() << "\t";
if (r.GetMax() == FLT_MAX)
out << "N/A\t";
else
out << r.GetMax() << "\t";
out << endl;
}
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
// ****************************************************************************
// Method: ResultDatabase::ClearAllResults
//
// Purpose:
// Clears all existing results from the ResultDatabase; used for multiple passes
// of the same test or multiple tests.
//
// Arguments:
//
// Programmer: Jeffrey Young
// Creation: September 10th, 2014
//
// Modifications:
//
//
// ****************************************************************************
void ResultDatabase::ClearAllResults() { results.clear(); }
// ****************************************************************************
// Method: ResultDatabase::DumpCsv
//
// Purpose:
// Writes either detailed or summary results (min/max/stddev/med/mean), but not
// every individual trial.
//
// Arguments:
// out file to print CSV results
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
void ResultDatabase::DumpCsv(string fileName) {
bool emptyFile;
vector<Result> sorted(results);
sort(sorted.begin(), sorted.end());
// Check to see if the file is empty - if so, add the headers
emptyFile = this->IsFileEmpty(fileName);
// Open file and append by default
ofstream out;
out.open(fileName.c_str(), std::ofstream::out | std::ofstream::app);
// Add headers only for empty files
if (emptyFile) {
// TODO: in big parallel runs, the "trials" are the procs
// and we really don't want to print them all out....
out << "test, "
<< "atts, "
<< "units, "
<< "median, "
<< "mean, "
<< "stddev, "
<< "min, "
<< "max, ";
out << endl;
}
for (int i = 0; i < sorted.size(); i++) {
Result& r = sorted[i];
out << r.test << ", ";
out << r.atts << ", ";
out << r.unit << ", ";
if (r.GetMedian() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMedian() << ", ";
if (r.GetMean() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMean() << ", ";
if (r.GetStdDev() == FLT_MAX)
out << "N/A, ";
else
out << r.GetStdDev() << ", ";
if (r.GetMin() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMin() << ", ";
if (r.GetMax() == FLT_MAX)
out << "N/A, ";
else
out << r.GetMax() << ", ";
out << endl;
}
out << endl;
out.close();
}
// ****************************************************************************
// Method: ResultDatabase::IsFileEmpty
//
// Purpose:
// Returns whether a file is empty - used as a helper for CSV printing
//
// Arguments:
// file The input file to check for emptiness
//
// Programmer: Jeffrey Young
// Creation: August 28th, 2014
//
// Modifications:
//
// ****************************************************************************
bool ResultDatabase::IsFileEmpty(string fileName) {
ifstream file(fileName.c_str());
// If the file doesn't exist it is by definition empty
if (!file.good()) {
return true;
} else {
bool fileEmpty;
fileEmpty = (bool)(file.peek() == ifstream::traits_type::eof());
file.close();
return fileEmpty;
}
// Otherwise, return false
return false;
}
// ****************************************************************************
// Method: ResultDatabase::GetResultsForTest
//
// Purpose:
// Returns a vector of results for just one test name.
//
// Arguments:
// test the name of the test results to search for
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
vector<ResultDatabase::Result> ResultDatabase::GetResultsForTest(const string& test) {
// get only the given test results
vector<Result> retval;
for (int i = 0; i < results.size(); i++) {
Result& r = results[i];
if (r.test == test) retval.push_back(r);
}
return retval;
}
// ****************************************************************************
// Method: ResultDatabase::GetResults
//
// Purpose:
// Returns all the results.
//
// Arguments:
//
// Programmer: Jeremy Meredith
// Creation: December 3, 2010
//
// Modifications:
//
// ****************************************************************************
const vector<ResultDatabase::Result>& ResultDatabase::GetResults() const { return results; }
@@ -1,89 +0,0 @@
#ifndef RESULT_DATABASE_H
#define RESULT_DATABASE_H
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <cfloat>
using std::ifstream;
using std::ofstream;
using std::ostream;
using std::string;
using std::vector;
// ****************************************************************************
// Class: ResultDatabase
//
// Purpose:
// Track numerical results as they are generated.
// Print statistics of raw results.
//
// Programmer: Jeremy Meredith
// Creation: June 12, 2009
//
// Modifications:
// Jeremy Meredith, Wed Nov 10 14:20:47 EST 2010
// Split timing reports into detailed and summary. E.g. for serial code,
// we might report all trial values, but skip them in parallel.
//
// Jeremy Meredith, Thu Nov 11 11:40:18 EST 2010
// Added check for missing value tag.
//
// Jeremy Meredith, Mon Nov 22 13:37:10 EST 2010
// Added percentile statistic.
//
// Jeremy Meredith, Fri Dec 3 16:30:31 EST 2010
// Added a method to extract a subset of results based on test name. Also,
// the Result class is now public, so that clients can use them directly.
// Added a GetResults method as well, and made several functions const.
//
// ****************************************************************************
class ResultDatabase {
public:
//
// A performance result for a single SHOC benchmark run.
//
struct Result {
string test; // e.g. "readback"
string atts; // e.g. "pagelocked 4k^2"
string unit; // e.g. "MB/sec"
vector<double> value; // e.g. "837.14"
double GetMin() const;
double GetMax() const;
double GetMedian() const;
double GetPercentile(double q) const;
double GetMean() const;
double GetStdDev() const;
bool operator<(const Result& rhs) const;
bool HadAnyFLTMAXValues() const {
for (int i = 0; i < value.size(); ++i) {
if (value[i] >= FLT_MAX) return true;
}
return false;
}
};
protected:
vector<Result> results;
public:
void AddResult(const string& test, const string& atts, const string& unit, double value);
void AddResults(const string& test, const string& atts, const string& unit,
const vector<double>& values);
vector<Result> GetResultsForTest(const string& test);
const vector<Result>& GetResults() const;
void ClearAllResults();
void DumpDetailed(ostream&);
void DumpSummary(ostream&);
void DumpCsv(string fileName);
private:
bool IsFileEmpty(string fileName);
};
#endif
@@ -1,3 +0,0 @@
loop(1000); H2D; NullKernel; D2H; endloop;
streamsync;
printTiming(1000)
@@ -1 +0,0 @@
H2D; NullKernel; D2H; streamsync
@@ -1,865 +0,0 @@
#include <stdio.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <algorithm>
#include <string>
#include <typeinfo>
#include <hip/hip_runtime.h>
#ifndef _WIN32
#include <sys/time.h>
#endif
#include "ResultDatabase.h"
#include "nullkernel.hip.cpp"
bool g_printedTiming = false;
// Cmdline parms:
int p_device = 0;
const char* p_command = "setstream(1); H2D; NullKernel; D2H;";
const char* p_file = nullptr;
unsigned p_verbose = 0x0;
unsigned p_db = 0x0;
unsigned p_blockingSync = 0x0;
//---
int p_iterations = 1;
#define KNRM "\x1B[0m"
#define KRED "\x1B[31m"
#define KGRN "\x1B[32m"
#define failed(...) \
printf("error: "); \
printf(__VA_ARGS__); \
printf("\n"); \
abort();
#define HIPCHECK(error) \
{ \
hipError_t localError = error; \
if (localError != hipSuccess) { \
printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \
localError, #error, __FILE__, __LINE__, KNRM); \
failed("API returned error code."); \
} \
}
#define HIPASSERT(condition, msg) \
if (!(condition)) { \
failed("%sassertion %s at %s:%d: %s%s\n", KRED, #condition, __FILE__, __LINE__, msg, \
KNRM); \
}
int parseInt(const char* str, int* output) {
char* next;
*output = strtol(str, &next, 0);
return !strlen(next);
}
void printConfig() {
hipDeviceProp_t props;
HIPCHECK(hipGetDeviceProperties(&props, p_device));
printf("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz\n", props.name,
props.totalGlobalMem / 1024.0 / 1024.0 / 1024.0, props.multiProcessorCount,
props.clockRate / 1000.0);
}
void help() {
printf("Usage: hipBusBandwidth [OPTIONS]\n");
printf(" --file, -f : Read string of commands from file\n");
printf(" --command, -c : String specifying commands to run.\n");
printf(" --iterations, -i : Number of copy iterations to run.\n");
printf(" --device, -d : Device ID to use (0..numDevices).\n");
printf(
" --verbose, -v : Verbose printing of status. Fore more info, combine with "
"HIP_TRACE_API on ROCm\n");
};
int parseStandardArguments(int argc, char* argv[]) {
for (int i = 1; i < argc; i++) {
const char* arg = argv[i];
if (!strcmp(arg, " ")) {
// skip NULL args.
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
if (++i >= argc || !parseInt(argv[i], &p_iterations)) {
failed("Bad --iterations argument");
}
} else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) {
if (++i >= argc || !parseInt(argv[i], &p_device)) {
failed("Bad --device argument");
}
} else if (!strcmp(arg, "--file") || (!strcmp(arg, "-f"))) {
if (++i >= argc) {
failed("Bad --file argument");
} else {
p_file = argv[i];
}
} else if (!strcmp(arg, "--commands") || (!strcmp(arg, "-c"))) {
if (++i >= argc) {
failed("Bad --commands argument");
} else {
p_command = argv[i];
}
} else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) {
p_verbose = 1;
} else if (!strcmp(arg, "--blockingSync") || (!strcmp(arg, "-B"))) {
p_blockingSync = 1;
} else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) {
help();
exit(EXIT_SUCCESS);
} else {
failed("Bad argument '%s'", arg);
}
}
return 0;
};
// Returns the current system time in microseconds
inline long long get_time() {
#ifdef _WIN32
struct timespec ts;
timespec_get(&ts, TIME_UTC);
return (ts.tv_sec * 1000000) + (ts.tv_nsec/1000);
#else
struct timeval tv;
gettimeofday(&tv, 0);
return (tv.tv_sec * 1000000) + tv.tv_usec;
#endif
}
class Command;
//=================================================================================================
// A stream of commands , specified as a string.
class CommandStream {
public:
// State that is inherited by sub-blocks:
struct CommandStreamState {
hipStream_t _currentStream;
std::vector<hipStream_t> _streams;
vector<CommandStream*> _subBlocks;
};
public:
CommandStream(std::string commandStreamString, int iterations);
~CommandStream();
hipStream_t currentStream() const { return _state._currentStream; };
void print(const std::string& indent = "") const;
void printBrief(std::ostream& s = std::cout) const;
void run();
void recordTime();
void printTiming(int iterations = 0);
CommandStream* currentCommandStream() {
return _parseInSubBlock ? _state._subBlocks.back() : this;
};
void enterSubBlock(CommandStream* commandStream) {
_parseInSubBlock = true;
_state._subBlocks.push_back(commandStream);
};
void exitSubBlock() { _parseInSubBlock = false; };
void setParent(CommandStream* parentCmdStream) {
_parentCommandStream = parentCmdStream;
_state = parentCmdStream->_state;
};
CommandStream* getParent() { return _parentCommandStream; };
void setStream(int streamIndex);
CommandStreamState& getState() { return _state; };
private:
static void tokenize(const std::string& s, char delim, std::vector<std::string>& tokens);
void parse(const std::string fullCmd);
protected:
CommandStreamState _state;
private:
// List of commands to run in this stream:
std::vector<Command*> _commands;
// Number of iterations to run the command loop
int _iterations;
// Us to run the the command-stream. Only valid after run is called.
long long _startTime;
double _elapsedUs;
// Track nested loop of command streams:
CommandStream* _parentCommandStream;
// Track if we are parsing commands in the subblock.
bool _parseInSubBlock;
};
//=================================================================================================
class Command {
public:
// @p minArgs : Minimum arguments for command. -1 = don't check.
// @p maxArgs : Minimum arguments for command. 0 means min=max, ie exact #arguments expected.
// -1 = don't check max.
Command(CommandStream* cmdStream, const std::vector<std::string>& args, int minArgs = 0,
int maxArgs = 0)
: _commandStream(cmdStream), _args(args) {
int numArgs = args.size() - 1;
if ((minArgs != -1) && (numArgs < minArgs)) {
// TODO - print full command here.
failed("Not enough arguments for command %s. (Expected %d, got %d)", args[0].c_str(),
minArgs, numArgs);
}
// Check for an exact number of arguments:
if (maxArgs == 0) {
maxArgs = minArgs;
}
if ((maxArgs != -1) && (numArgs > maxArgs)) {
failed("Too many arguments for command %s. (Expected %d, got %d)", args[0].c_str(),
maxArgs, numArgs);
}
};
void printBrief(std::ostream& s = std::cout) const { s << _args[0]; }
virtual ~Command(){};
virtual void print(const std::string& indent = "") const {
std::cout << indent << "[";
std::for_each(_args.begin(), _args.end(), [](const std::string& s) { std::cout << s; });
std::cout << "]";
};
virtual void run() = 0;
protected:
int readIntArg(int argIndex, const std::string& argName) {
// TODO - catch references to non-existant arguments here.
int argVal;
try {
argVal = std::stoi(_args[argIndex]);
} catch (std::invalid_argument) {
failed("Command %s has bad %s argument ('%s')", _args[0].c_str(), argName.c_str(),
_args[argIndex].c_str());
}
return argVal;
}
protected:
CommandStream* _commandStream;
std::vector<std::string> _args;
};
#define FILENAME "nullkernel.hsaco"
#define KERNEL_NAME "NullKernel"
//=================================================================================================
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
class ModuleKernelCommand : public Command {
public:
ModuleKernelCommand(CommandStream* cmdStream, const std::vector<std::string>& args)
: Command(cmdStream, args), _stream(cmdStream->currentStream()) {
hipModule_t module;
HIPCHECK(hipModuleLoad(&module, FILENAME));
HIPCHECK(hipModuleGetFunction(&_function, module, KERNEL_NAME));
};
~ModuleKernelCommand(){};
void run() override {
#define LEN 64
float *X = NULL;
HIPCHECK(hipMalloc((void**)&X, sizeof(float)));
struct {
float *Ad;
}args;
args.Ad = X;
size_t argSize = sizeof(args);
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
HIP_LAUNCH_PARAM_BUFFER_SIZE, &argSize, HIP_LAUNCH_PARAM_END};
hipModuleLaunchKernel(_function, 1, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
};
public:
hipFunction_t _function;
hipStream_t _stream;
};
class KernelCommand : public Command {
public:
enum Type { Null, VectorAdd };
KernelCommand(CommandStream* cmdStream, const std::vector<std::string>& args, Type kind)
: Command(cmdStream, args), _kind(kind), _stream(cmdStream->currentStream()){};
~KernelCommand(){};
void run() override {
static const int gridX = 64;
static const int groupX = 64;
switch (_kind) {
case Null:
hipLaunchKernelGGL(NullKernel, dim3(gridX / groupX), dim3(gridX), 0, _stream, nullptr);
break;
case VectorAdd:
assert(0); // TODO
break;
};
}
private:
Type _kind;
hipStream_t _stream;
};
//=================================================================================================
class CopyCommand : public Command {
enum MemType { PinnedHost, UnpinnedHost, Device };
public:
CopyCommand(CommandStream* cmdStream, const std::vector<std::string>& args, hipMemcpyKind kind,
bool isAsync, bool isPinnedHost);
~CopyCommand() {
if (_dst) {
dealloc(_dst, _dstType);
_dst = NULL;
};
if (_src) {
dealloc(_src, _srcType);
_src = NULL;
}
}
void run() override {
if (_isAsync) {
HIPCHECK(hipMemcpyAsync(_dst, _src, _sizeBytes, _kind, _stream));
} else {
HIPCHECK(hipMemcpy(_dst, _src, _sizeBytes, _kind));
}
};
private:
void* alloc(size_t size, MemType memType) {
void* p;
if (memType == Device) {
HIPCHECK(hipMalloc(&p, size));
} else if (memType == PinnedHost) {
HIPCHECK(hipHostMalloc(&p, size));
} else if (memType == UnpinnedHost) {
p = (char*)malloc(size);
HIPASSERT(p, "malloc failed");
} else {
HIPASSERT(0, "unsupported memType");
}
return p;
};
static void dealloc(void* p, MemType memType) {
if (memType == Device) {
HIPCHECK(hipFree(p));
} else if (memType == PinnedHost) {
HIPCHECK(hipHostFree(p));
} else if (memType == UnpinnedHost) {
free(p);
} else {
HIPASSERT(0, "unsupported memType");
}
}
private:
bool _isAsync;
hipStream_t _stream;
hipMemcpyKind _kind;
size_t _sizeBytes;
void* _dst;
MemType _dstType;
void* _src;
MemType _srcType;
};
//=================================================================================================
class DeviceSyncCommand : public Command {
public:
DeviceSyncCommand(CommandStream* cmdStream, const std::vector<std::string>& args)
: Command(cmdStream, args){};
void run() override { HIPCHECK(hipDeviceSynchronize()); };
};
//=================================================================================================
class StreamSyncCommand : public Command {
public:
StreamSyncCommand(CommandStream* cmdStream, const std::vector<std::string>& args)
: Command(cmdStream, args), _stream(cmdStream->currentStream()){};
static const char* help() { return "synchronizes the current stream"; };
void run() override { HIPCHECK(hipStreamSynchronize(_stream)); };
private:
hipStream_t _stream;
};
//=================================================================================================
//=================================================================================================
class LoopCommand : public Command {
public:
LoopCommand(CommandStream* parentCmdStream, const std::vector<std::string>& args)
: Command(parentCmdStream, args, 1) {
int loopCnt;
try {
loopCnt = std::stoi(args[1]);
} catch (std::invalid_argument) {
failed("bad LOOP_CNT=%s", args[1].c_str());
}
_commandStream = new CommandStream("", loopCnt);
_commandStream->setParent(parentCmdStream);
parentCmdStream->enterSubBlock(_commandStream);
};
void print(const std::string& indent = "") const override {
Command::print();
_commandStream->print(indent + " ");
};
void run() override { _commandStream->run(); };
};
//=================================================================================================
class EndBlockCommand : public Command {
public:
EndBlockCommand(CommandStream* blockCmdStream, CommandStream* parentCmdStream,
const std::vector<std::string>& args)
: Command(parentCmdStream, args, 0, 1), _blockCmdStream(blockCmdStream), _printTiming(0) {
int argCnt = args.size() - 1;
if (argCnt >= 1) {
_printTiming = readIntArg(1, "PRINT_TIMING");
}
if (parentCmdStream == nullptr) {
failed("%s without corresponding command to start block", args[0].c_str());
}
parentCmdStream->exitSubBlock();
};
void run() override {
if (_printTiming) {
_blockCmdStream->printTiming();
}
};
private:
CommandStream* _blockCmdStream;
// print the stream when loop exits.
int _printTiming;
};
//=================================================================================================
class SetStreamCommand : public Command {
public:
SetStreamCommand(CommandStream* cmdStream, const std::vector<std::string>& args)
: Command(cmdStream, args, 1) {
int streamIndex = readIntArg(1, "STREAM_INDEX");
cmdStream->setStream(streamIndex);
};
void run() override{};
};
//=================================================================================================
class PrintTimingCommand : public Command {
public:
PrintTimingCommand(CommandStream* cmdStream, const std::vector<std::string>& args)
: Command(cmdStream, args, 1) {
_iterations = readIntArg(1, "ITERATIONS");
};
void run() override { _commandStream->printTiming(_iterations); };
private:
int _iterations;
};
//=================================================================================================
CopyCommand::CopyCommand(CommandStream* cmdStream, const std::vector<std::string>& args,
hipMemcpyKind kind, bool isAsync, bool isPinnedHost)
: Command(cmdStream, args),
_isAsync(isAsync),
_stream(cmdStream->currentStream()),
_kind(kind) {
switch (kind) {
case hipMemcpyDeviceToHost:
_srcType = Device;
_dstType = isPinnedHost ? PinnedHost : UnpinnedHost;
break;
case hipMemcpyHostToDevice:
_srcType = isPinnedHost ? PinnedHost : UnpinnedHost;
_dstType = Device;
break;
default:
HIPASSERT(0, "Unknown hipMemcpyKind");
};
_sizeBytes = 64; // TODO, support reading from arg.
_dst = alloc(_sizeBytes, _dstType);
_src = alloc(_sizeBytes, _srcType);
};
//=================================================================================================
//=================================================================================================
// Implementations:
//=================================================================================================
//=================================================================================================
CommandStream::CommandStream(std::string commandStreamString, int iterations)
: _iterations(iterations),
_startTime(0),
_elapsedUs(0.0),
_parentCommandStream(nullptr),
_parseInSubBlock(false) {
std::vector<std::string> tokens;
tokenize(commandStreamString, ';', tokens);
setStream(0);
std::for_each(tokens.begin(), tokens.end(), [&](const std::string s) { this->parse(s); });
}
CommandStream::~CommandStream() {
std::for_each(_state._streams.begin(), _state._streams.end(), [&](hipStream_t s) {
if (s) {
HIPCHECK(hipStreamDestroy(s));
}
});
std::for_each(_commands.begin(), _commands.end(), [&](Command* c) { delete c; });
}
void CommandStream::setStream(int streamIndex) {
if (streamIndex >= _state._streams.size()) {
_state._streams.resize(streamIndex + 1);
}
if (streamIndex && (_state._streams[streamIndex] == nullptr)) {
// Create new stream:
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
_state._streams[streamIndex] = stream;
_state._currentStream = stream;
} else {
// Use existing stream:
_state._currentStream = _state._streams[streamIndex];
}
}
void CommandStream::tokenize(const std::string& s, char delim, std::vector<std::string>& tokens) {
std::stringstream ss;
ss.str(s);
std::string item;
while (getline(ss, item, delim)) {
item.erase(std::remove(item.begin(), item.end(), ' '), item.end()); // remove whitespace.
tokens.push_back(item);
}
}
void trim(std::string* s) {
// trim whitespace from begin and end:
const char* t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
s->erase(s->find_last_not_of(t) + 1);
}
void ltrim(std::string* s) {
// trim whitespace from begin and end:
const char* t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
}
void CommandStream::parse(std::string fullCmd) {
// convert to lower-case:
std::transform(fullCmd.begin(), fullCmd.end(), fullCmd.begin(), ::tolower);
trim(&fullCmd);
if (p_db) {
printf("parse: <%s>\n", fullCmd.c_str());
}
std::string c;
std::vector<std::string> args;
size_t leftParenZ = fullCmd.find_first_of('(');
if (leftParenZ == string::npos) {
c = fullCmd;
args.push_back(c);
} else {
c = fullCmd.substr(0, leftParenZ);
args.push_back(c);
size_t rightParenZ = fullCmd.find_first_of(')', leftParenZ);
std::string argStr = fullCmd.substr(leftParenZ + 1, rightParenZ - leftParenZ - 1);
// printf ("c=%s argstr='%s' leftParenZ=%zu rightParenZ=%zu\n", c.c_str(), argStr.c_str(),
// leftParenZ, rightParenZ);
tokenize(argStr, ',', args);
}
if ((args.size() == 0) || (fullCmd.c_str()[0] == '#')) {
if (p_db) {
printf(" skip comment\n");
}
return;
}
Command* cmd = NULL;
CommandStream* cmdStream = currentCommandStream();
if (c == "h2d") {
cmd = new CopyCommand(cmdStream, args, hipMemcpyHostToDevice, true /*isAsync*/,
true /*isPinned*/);
//= h2d
//= Performs an async host-to-device copy of array A_h to A_d.
//= The size of these arrays may be set with the datasize command.
} else if (c == "d2h") {
cmd = new CopyCommand(cmdStream, args, hipMemcpyDeviceToHost, true /*isAsync*/,
true /*isPinned*/);
//= d2h
//= Performs an async device-to-host copy of array A_d to A_h.
//= The size of these arrays may be set with the datasize command.
} else if (c == "modulekernel") {
cmd = new ModuleKernelCommand(cmdStream, args);
} else if (c == "nullkernel") {
cmd = new KernelCommand(cmdStream, args, KernelCommand::Null);
//= nullkernel
//= Dispatches a null kernel to the device.
} else if (c == "vectoraddkernel") {
cmd = new KernelCommand(cmdStream, args, KernelCommand::VectorAdd);
} else if (c == "devicesync") {
cmd = new DeviceSyncCommand(cmdStream, args);
} else if (c == "streamsync") {
//= streamsync
//= Execute hipStreamSynchronize.
//= This will cause the host thread to wait until the current stream
//= completes all pending operations.
cmd = new StreamSyncCommand(cmdStream, args);
} else if (c == "setstream") {
//= setstream(STREAM_INDEX);
//= Set current stream used by subsequent commands.
//= STREAM_INDEX is index starting from 0...N.
//= This function will create new stream on first call to setstream or re-use previous
//= stream if setstream has already been called with STREAM_INDEX.
//= STREAM_INDEX=0 will use the default "null" stream associated with the device, and will
//not create a new stream. = The default stream has special, conservative synchronization
//properties.
cmd = new SetStreamCommand(cmdStream, args);
} else if (c == "printtiming") {
cmd = new PrintTimingCommand(cmdStream, args);
} else if (c == "loop") {
//= loop(LOOP_CNT)
//= Loop over next set of commands (until 'endloop' command) for LOOP_CNT iterations.
//= Loops can be nested.
cmd = new LoopCommand(cmdStream, args);
} else if (c == "endloop") {
//= endloop
//= End a looped sequence. Must be paired with a preceding loop command.
//= Command between the `loop` and `endloop` must be executed
CommandStream* parentCmdStream = cmdStream->getParent();
cmd = new EndBlockCommand(cmdStream, parentCmdStream, args);
cmdStream = parentCmdStream;
} else {
std::cerr << "error: Bad command '" << fullCmd << "\n";
HIPASSERT(0, "bad command in command-stream");
}
if (cmd) {
cmdStream->_commands.push_back(cmd);
}
}
void CommandStream::print(const std::string& indent) const {
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
(*cmdI)->print(indent);
};
}
void CommandStream::printBrief(std::ostream& s) const {
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
(*cmdI)->printBrief(s);
s << ";";
};
}
void CommandStream::run() {
_startTime = get_time();
for (int i = 0; i < _iterations; i++) {
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
if (p_verbose) {
(*cmdI)->print();
}
(*cmdI)->run();
}
}
// Record time, if not already stored. (an earlier printTime command will also store the time)
recordTime();
};
void CommandStream::recordTime() {
if (_elapsedUs == 0.0) {
auto stopTime = get_time();
_elapsedUs = stopTime - _startTime;
}
}
void CommandStream::printTiming(int iterations) {
if ((_state._subBlocks.size() == 1) && (_commands.size() == 1)) {
// printf ("print just the loop\n");
_state._subBlocks.front()->printTiming(iterations);
} else {
g_printedTiming = true;
recordTime();
if (iterations == 0) {
iterations = _iterations;
}
std::cout << "command<";
printBrief(std::cout);
std::cout << ">,";
printf(" iterations,%d, total_time,%6.3f, time/iteration,%6.3f\n", iterations,
_elapsedUs, _elapsedUs / iterations);
}
};
//=================================================================================================
int main(int argc, char* argv[]) {
parseStandardArguments(argc, argv);
printConfig();
CommandStream* cs;
if (p_blockingSync) {
#ifdef __HIP_PLATFORM_AMD__
printf("setting BlockingSync for AMD\n");
#ifdef _WIN32
_putenv_s("HIP_BLOCKING_SYNC", "1");
#else
setenv("HIP_BLOCKING_SYNC", "1", 1);
#endif
#endif
#ifdef __HIP_PLATFORM_NVIDIA__
printf("setting cudaDeviceBlockingSync\n");
HIPCHECK(hipSetDeviceFlags(cudaDeviceBlockingSync));
#endif
};
if (p_file) {
// TODO - catch exception on file IO here:
std::ifstream file(p_file);
std::string str;
std::string file_contents;
while (std::getline(file, str)) {
file_contents += str;
}
cs = new CommandStream(file_contents, p_iterations);
} else {
cs = new CommandStream(p_command, p_iterations);
}
cs->print();
printf("------\n");
cs->run();
if (!g_printedTiming) {
cs->printTiming();
}
delete cs;
}
// TODO - add error checking for arguments.
@@ -1,3 +0,0 @@
setstream(1);
NullKernel; streamsync;
loop(10000); H2D; NullKernel; streamsync; endloop(1);
@@ -1,3 +0,0 @@
loop(1000); H2D; NullKernel; D2H; endloop;
streamsync;
printTiming(1000)
@@ -1,2 +0,0 @@
setstream(1);
loop(1000); NullKernel; streamsync; endloop(1);
@@ -1,7 +0,0 @@
#include "hip/hip_runtime.h"
extern "C" __global__ void NullKernel(float* Ad) {
if (Ad) {
Ad[0] = 42;
}
}
@@ -1,10 +0,0 @@
setstream(1);
NullKernel; streamsync;
loop(30000); NullKernel; streamsync; endloop(1);
loop(30000); H2D; H2D; NullKernel; streamsync; endloop(1);
loop(30000); H2D; H2D; H2D; NullKernel; streamsync; endloop(1);
loop(30000); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(30000); NullKernel; D2H; streamsync; endloop(1);
loop(30000); NullKernel; D2H; D2H; streamsync; endloop(1);
loop(30000); NullKernel; D2H; D2H; D2H; streamsync; endloop(1);
@@ -1,8 +0,0 @@
setstream(1);
NullKernel; streamsync;
loop(100000); NullKernel; streamsync; endloop(1);
loop(100000); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
@@ -1,5 +0,0 @@
setstream(1);
NullKernel; streamsync;
loop(100000); NullKernel; streamsync; endloop(1);
loop(100000); H2D; NullKernel; streamsync; endloop(1);
loop(100000); H2D; NullKernel; D2H; streamsync; endloop(1);
@@ -1,7 +0,0 @@
setstream(0);
NullKernel; streamsync;
loop(100000); NullKernel; streamsync; endloop(1);
loop(100000); H2D; NullKernel; streamsync; endloop(1);
loop(100000); H2D; NullKernel; D2H; streamsync; endloop(1);
@@ -1,5 +0,0 @@
setstream(1);
NullKernel; streamsync;
loop(100); ModuleKernel; streamsync; endloop(1);
loop(100); streamsync; endloop(1);
loop(3000); NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); streamsync; streamsync; endloop(1);
loop(10); streamsync; streamsync; endloop(1);
loop(100); streamsync; streamsync; endloop(1);
loop(100); streamsync; streamsync; endloop(1);
loop(1000); streamsync; streamsync; endloop(1);
loop(1000); streamsync; streamsync; endloop(1);
loop(10000); streamsync; streamsync; endloop(1);
loop(10000); streamsync; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; D2H; streamsync; endloop(1);
loop(10); D2H; streamsync; D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; D2H; streamsync; endloop(1);
loop(1000); D2H;streamsync; D2H; streamsync; endloop(1);
loop(1000); D2H; streamsync; D2H; streamsync; endloop(1);
loop(1000); D2H; streamsync; D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; D2H; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); D2H; D2H; streamsync; endloop(1);
loop(10); D2H; D2H; streamsync; endloop(1);
loop(100); D2H; D2H; streamsync; endloop(1);
loop(100); D2H; D2H; streamsync; endloop(1);
loop(1000); D2H; D2H; streamsync; endloop(1);
loop(1000); D2H; D2H; streamsync; endloop(1);
loop(1000); D2H; D2H; streamsync; endloop(1);
loop(10000); D2H; D2H; streamsync; endloop(1);
loop(10000); D2H; D2H; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; H2D; streamsync; endloop(1);
loop(10); H2D; streamsync; H2D; streamsync; endloop(1);
loop(100); H2D; streamsync; H2D; streamsync; endloop(1);
loop(100); H2D; streamsync; H2D; streamsync; endloop(1);
loop(1000); H2D;streamsync; H2D; streamsync; endloop(1);
loop(1000); H2D; streamsync; H2D; streamsync; endloop(1);
loop(1000); H2D; streamsync; H2D; streamsync; endloop(1);
loop(10000); H2D; streamsync; H2D; streamsync; endloop(1);
loop(10000); H2D; streamsync; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel; streamsync;endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(10); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
loop(100); H2D; NullKernel; streamsync; H2D; NullKernel; streamsync;endloop(1);
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D ; NullKernel; streamsync;H2D; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; H2D; streamsync; endloop(1);
loop(10); H2D; H2D; streamsync; endloop(1);
loop(100); H2D; H2D; streamsync; endloop(1);
loop(100); H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; streamsync; endloop(1);
loop(10000); H2D; H2D; streamsync; endloop(1);
loop(10000); H2D; H2D; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; NullKernel; streamsync; endloop(1);
loop(10); NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(10); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync;D2H; H2D; streamsync; D2H; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(10); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync;D2H; NullKernel; streamsync;streamsync; D2H; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(10); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync;D2H; streamsync; H2D; streamsync; D2H; streamsync; H2D;streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(10); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(100); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(1000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
loop(10000); D2H; streamsync; NullKernel; streamsync;D2H; streamsync; NullKernel; streamsync; D2H; streamsync; NullKernel;streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(10); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync;H2D; D2H; streamsync; H2D; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(10); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync;H2D; NullKernel; streamsync;streamsync; H2D; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(10); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync;H2D; streamsync; D2H; streamsync; H2D; streamsync; D2H;streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(10); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync;H2D; streamsync; NullKernel; streamsync; H2D; streamsync; NullKernel;streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(10); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(100); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(1000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(10000); NullKernel; D2H; streamsync;NullKernel; D2H; streamsync; NullKernel; D2H; streamsync;endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(10); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(100); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(1000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
loop(10000); NullKernel; H2D; streamsync;NullKernel; H2D; streamsync; NullKernel; H2D; streamsync;endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync;NullKernel; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync;NullKernel; streamsync; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10);streamsync; streamsync; streamsync; endloop(1);
loop(10);streamsync; streamsync; streamsync; endloop(1);
loop(100);streamsync; streamsync; streamsync; endloop(1);
loop(100);streamsync; streamsync; streamsync; endloop(1);
loop(1000);streamsync; streamsync; streamsync; endloop(1);
loop(1000);streamsync; streamsync; streamsync; endloop(1);
loop(10000);streamsync; streamsync; streamsync; endloop(1);
loop(10000);streamsync; streamsync; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(10); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(1000); D2H;streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(1000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; D2H; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; D2H; D2H; streamsync; endloop(1);
loop(10); D2H; D2H; D2H; streamsync; endloop(1);
loop(100); D2H; D2H; D2H; streamsync; endloop(1);
loop(100); D2H; D2H; D2H; streamsync; endloop(1);
loop(1000); D2H; D2H; D2H; streamsync; endloop(1);
loop(1000); D2H; D2H; D2H; streamsync; endloop(1);
loop(10000); D2H; D2H; D2H; streamsync; endloop(1);
loop(10000); D2H; D2H; D2H;streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1);
loop(10); H2D; streamsync;H2D;streamsync; H2D; streamsync; endloop(1);
loop(100); H2D; streamsync;H2D; streamsync;H2D; streamsync; endloop(1);
loop(100); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
loop(1000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1);
loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
loop(1000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
loop(10000); H2D;streamsync; H2D; streamsync;H2D; streamsync; endloop(1);
loop(10000); H2D;streamsync; H2D;streamsync; H2D; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; H2D; H2D; streamsync; endloop(1);
loop(10); H2D; H2D; H2D; streamsync; endloop(1);
loop(100); H2D; H2D; H2D; streamsync; endloop(1);
loop(100); H2D; H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
loop(1000); H2D; H2D; H2D; streamsync; endloop(1);
loop(10000); H2D; H2D; H2D; streamsync; endloop(1);
loop(10000); H2D; H2D; H2D; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; NullKernel; streamsync; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel; streamsync; endloop(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;NullKernel; streamsync; endloop(1);
loop(10); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;NullKernel;streamsync; endloop(1);
loop(100); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(1000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
loop(10000); NullKernel; NullKernel; NullKernel; NullKernel; NullKernel; NullKernel;streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; endloop(1);
loop(10); D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; endloop(1);
loop(100); D2H; streamsync; endloop(1);
loop(1000); D2H; streamsync; endloop(1);
loop(1000); D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; endloop(1);
loop(10000); D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; H2D; streamsync; endloop(1);
loop(10); D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync; endloop(1);
loop(100); D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync; endloop(1);
loop(1000); D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync; endloop(1);
loop(10000); D2H; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; NullKernel; streamsync; endloop(1);
loop(10); D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync; endloop(1);
loop(100); D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync; endloop(1);
loop(1000); D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync; endloop(1);
loop(10000); D2H; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync; H2D; streamsync; endloop(1);
loop(10); D2H; streamsync; H2D; streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync; endloop(1);
loop(100); D2H; streamsync; H2D; streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync; endloop(1);
loop(1000); D2H; streamsync; H2D; streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync; endloop(1);
loop(10000); D2H; streamsync; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(10); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(100); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(1000); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1);
loop(10000); D2H; streamsync;NullKernel; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; endloop(1);
loop(10); H2D; streamsync; endloop(1);
loop(100); H2D; streamsync; endloop(1);
loop(100); H2D; streamsync; endloop(1);
loop(1000); H2D; streamsync; endloop(1);
loop(1000); H2D; streamsync; endloop(1);
loop(1000); H2D; streamsync; endloop(1);
loop(10000); H2D; streamsync; endloop(1);
loop(10000); H2D; streamsync; endloop(1);
@@ -1,2 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; D2H; streamsync; endloop(1);
loop(10); H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync; endloop(1);
loop(100); H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync; endloop(1);
loop(1000); H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync; endloop(1);
loop(10000); H2D; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; NullKernel; streamsync; endloop(1);
loop(10); H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10);H2D; streamsync; NullKernel; D2H; streamsync;endloop(1);
loop(10); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10);H2D; NullKernel; D2H; streamsync;endloop(1);
loop(10); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(100); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(100); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(1000); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1);
loop(10000); H2D; NullKernel; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; NullKernel; streamsync; endloop(1);
loop(10); H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync; endloop(1);
loop(100); H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; endloop(1);
loop(1000); H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D; NullKernel; streamsync; endloop(1);
loop(10000); H2D ; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; D2H; streamsync; endloop(1);
loop(10); H2D; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10);H2D; streamsync; NullKernel;streamsync; D2H; streamsync;endloop(1);
loop(10); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); H2D; streamsync; NullKernel; streamsync; D2H; streamsync; endloop(1);
@@ -1,10 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; endloop(1);
loop(10); NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; endloop(1);
loop(100); NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; streamsync; endloop(1);
loop(10); NullKernel; streamsync; streamsync; endloop(1);
loop(100); NullKernel; streamsync; streamsync; endloop(1);
loop(100); NullKernel; streamsync; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; D2H; streamsync; endloop(1);
loop(10); NullKernel; D2H; streamsync; endloop(1);
loop(100); NullKernel; D2H; streamsync; endloop(1);
loop(100); NullKernel; D2H; streamsync; endloop(1);
loop(1000); NullKernel; D2H; streamsync; endloop(1);
loop(1000); NullKernel; D2H; streamsync; endloop(1);
loop(10000); NullKernel; D2H; streamsync; endloop(1);
loop(10000); NullKernel; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; H2D; streamsync; endloop(1);
loop(10); NullKernel; H2D; streamsync; endloop(1);
loop(100); NullKernel; H2D; streamsync; endloop(1);
loop(100); NullKernel; H2D; streamsync; endloop(1);
loop(1000); NullKernel; H2D; streamsync; endloop(1);
loop(1000); NullKernel; H2D; streamsync; endloop(1);
loop(10000); NullKernel; H2D; streamsync; endloop(1);
loop(10000); NullKernel; H2D; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(100); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; D2H; streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(100); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(1000); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1);
loop(10000); NullKernel; streamsync; H2D; streamsync; endloop(1);
@@ -1,2 +0,0 @@
setstream(1);
loop(10);setstream(1);setstream(2);setstream(3);setstream(4);setstream(5);streamsync; endloop(1);
@@ -1,9 +0,0 @@
setstream(1);
loop(10); streamsync; endloop(1);
loop(10); streamsync; endloop(1);
loop(100); streamsync; endloop(1);
loop(100); streamsync; endloop(1);
loop(1000); streamsync; endloop(1);
loop(1000); streamsync; endloop(1);
loop(10000); streamsync; endloop(1);
loop(10000); streamsync; endloop(1);
@@ -1,3 +0,0 @@
setstream(1);
setstream(2); H2D; NullKernel; D2H;
streamsync
@@ -1,18 +0,0 @@
#include <hip/hip_runtime.h>
static const int BLOCKSIZEX = 32;
static const int BLOCKSIZEY = 16;
__global__ void fails(float* pErrorI) {
if (pErrorI != 0) {
pErrorI[0] = 1;
}
}
int main() {
dim3 blocks(1, 1);
dim3 threads(BLOCKSIZEX, BLOCKSIZEY);
float error;
hipLaunchKernelGGL(HIP_KERNEL_NAME(fails), blocks, threads, 0, 0, &error);
}