Small tool, doc, sample enhancements.
- Expand message when HIP version mismatch detected. - Doc touchup. - change sorting of hipBusBandwidth so byte results shown at top. - Change-Id: Ifb4e44a5fdfb65d59c4994b11e5f13385705f7e0
This commit is contained in:
+1
-1
@@ -333,7 +333,7 @@ if ($printHipVersion) {
|
||||
}
|
||||
if ($runCmd) {
|
||||
if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) {
|
||||
print ("HIP was built using $hipConfig{'HCC_VERSION'}, but you are using $HCC_VERSION. Please rebuild HIP.\n") && die ();
|
||||
print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using hcc $HCC_VERSION. Please rebuild HIP including cmake.\n") && die ();
|
||||
}
|
||||
system ("$CMD") and die ();
|
||||
}
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
| `cudaStreamDestroy` | `hipStreamDestroy` | Destroys and cleans up an asynchronous stream. |
|
||||
| `cudaStreamGetFlags` | `hipStreamGetFlags` | Query the flags of a stream. |
|
||||
| `cudaStreamGetPriority` | | Query the priority of a stream. |
|
||||
| `cudaStreamQuery` | | Queries an asynchronous stream for completion status. |
|
||||
| `cudaStreamQuery` | `hipStreamQuery` | Queries an asynchronous stream for completion status. |
|
||||
| `cudaStreamSynchronize` | `hipStreamSynchronize` | Waits for stream tasks to complete. |
|
||||
| `cudaStreamWaitEvent` | `hipStreamWaitEvent` | Make a compute stream wait on an event. |
|
||||
|
||||
|
||||
@@ -146,11 +146,11 @@ The tools also struggle with more complex CUDA applications, in particular those
|
||||
- For Nvidia platforms, HIP requires Unified Memory and should run on a device which runs the CUDA SDK 6.0 or newer. We have tested the Nvidia Titan and K40.
|
||||
|
||||
### Does Hipify automatically convert all source code?
|
||||
Typically, Hipify can automatically convert almost all run-time code, and the coordinate indexing device code.
|
||||
Typically, Hipify can automatically convert almost all run-time code, and the coordinate indexing device code (i.e. threadIdx.x -> hipThreadIdx_x).
|
||||
Most device code needs no additional conversion, since HIP and CUDA have similar names for math and built-in functions.
|
||||
HIP currently requires manual addition of one more arguments to the kernel so that the host can communicate the execution configuration to the device.
|
||||
The clang-hipify tool will automatically modify the kernel signature as needed (automating a step that used to be done manually)
|
||||
Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support.
|
||||
Developers should always expect to perform some platform-specific tuning and optimization.
|
||||
In general, developers should always expect to perform some platform-specific tuning and optimization.
|
||||
|
||||
### What is NVCC?
|
||||
NVCC is Nvidia's compiler driver for compiling "CUDA C++" code into PTX or device code for Nvidia GPUs. It's a closed-source binary product that comes with CUDA SDKs.
|
||||
|
||||
@@ -7,16 +7,22 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define SORT_RETAIN_ATTS_ORDER 1
|
||||
|
||||
|
||||
bool ResultDatabase::Result::operator<(const Result &rhs) const
|
||||
{
|
||||
if (test < rhs.test)
|
||||
return true;
|
||||
if (test > rhs.test)
|
||||
return false;
|
||||
#if (SORT_RETAIN_ATTS_ORDER == 0)
|
||||
// For ties, sort by the value of the attribute:
|
||||
if (atts < rhs.atts)
|
||||
return true;
|
||||
if (atts > rhs.atts)
|
||||
return false;
|
||||
#endif
|
||||
return false; // less-operator returns false on equal
|
||||
}
|
||||
|
||||
@@ -189,7 +195,8 @@ void ResultDatabase::AddResult(const string &test_orig,
|
||||
void ResultDatabase::DumpDetailed(ostream &out)
|
||||
{
|
||||
vector<Result> sorted(results);
|
||||
sort(sorted.begin(), sorted.end());
|
||||
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
|
||||
const int testNameW = 24 ;
|
||||
const int attW = 12;
|
||||
@@ -281,7 +288,8 @@ void ResultDatabase::DumpDetailed(ostream &out)
|
||||
void ResultDatabase::DumpSummary(ostream &out)
|
||||
{
|
||||
vector<Result> sorted(results);
|
||||
sort(sorted.begin(), sorted.end());
|
||||
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
|
||||
const int testNameW = 24 ;
|
||||
const int attW = 12;
|
||||
@@ -377,7 +385,7 @@ void ResultDatabase::DumpCsv(string fileName)
|
||||
bool emptyFile;
|
||||
vector<Result> sorted(results);
|
||||
|
||||
sort(sorted.begin(), sorted.end());
|
||||
stable_sort(sorted.begin(), sorted.end());
|
||||
|
||||
//Check to see if the file is empty - if so, add the headers
|
||||
emptyFile = this->IsFileEmpty(fileName);
|
||||
|
||||
@@ -49,8 +49,8 @@ std::string sizeToString(int size)
|
||||
using namespace std;
|
||||
stringstream ss;
|
||||
if (size < 0) {
|
||||
// char (09, horiz tab) lexically sorts before " " so will cause Byte values to be displayed before kB.
|
||||
ss << char(0x09)/*tab*/ << setfill('0') << setw(3) << -size << "B";
|
||||
// char (-) lexically sorts before " " so will cause Byte values to be displayed before kB.
|
||||
ss << "+" << setfill('0') << setw(3) << -size << "By";
|
||||
} else {
|
||||
ss << size << "kB";
|
||||
}
|
||||
|
||||
@@ -51,9 +51,11 @@ ctest -R Memcpy
|
||||
```
|
||||
|
||||
|
||||
### If a test fails:
|
||||
### If a test fails - how to debug a test
|
||||
|
||||
Extract the commandline from the testing log:
|
||||
|
||||
(From the test build directory, perhaps hip/tests/build)
|
||||
$ grep -A3 -m2 hipMemcpy-size Testing/Temporary/LastTest.log
|
||||
36/47 Testing: hipMemcpy-size
|
||||
36/47 Test: hipMemcpy-size
|
||||
|
||||
Verwijs in nieuw issue
Block a user