diff --git a/bin/hipcc b/bin/hipcc index 4c4a0dd714..b6d358532c 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -333,7 +333,7 @@ if ($printHipVersion) { } if ($runCmd) { if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) { - print ("HIP was built using $hipConfig{'HCC_VERSION'}, but you are using $HCC_VERSION. Please rebuild HIP.\n") && die (); + print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using hcc $HCC_VERSION. Please rebuild HIP including cmake.\n") && die (); } system ("$CMD") and die (); } diff --git a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 2d589ec415..4c92abbba6 100644 --- a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -49,7 +49,7 @@ | `cudaStreamDestroy` | `hipStreamDestroy` | Destroys and cleans up an asynchronous stream. | | `cudaStreamGetFlags` | `hipStreamGetFlags` | Query the flags of a stream. | | `cudaStreamGetPriority` | | Query the priority of a stream. | -| `cudaStreamQuery` | | Queries an asynchronous stream for completion status. | +| `cudaStreamQuery` | `hipStreamQuery` | Queries an asynchronous stream for completion status. | | `cudaStreamSynchronize` | `hipStreamSynchronize` | Waits for stream tasks to complete. | | `cudaStreamWaitEvent` | `hipStreamWaitEvent` | Make a compute stream wait on an event. | diff --git a/docs/markdown/hip_faq.md b/docs/markdown/hip_faq.md index 31a032469b..b09771ab71 100644 --- a/docs/markdown/hip_faq.md +++ b/docs/markdown/hip_faq.md @@ -146,11 +146,11 @@ The tools also struggle with more complex CUDA applications, in particular those - For Nvidia platforms, HIP requires Unified Memory and should run on a device which runs the CUDA SDK 6.0 or newer. We have tested the Nvidia Titan and K40. ### Does Hipify automatically convert all source code? -Typically, Hipify can automatically convert almost all run-time code, and the coordinate indexing device code. +Typically, Hipify can automatically convert almost all run-time code, and the coordinate indexing device code (i.e. threadIdx.x -> hipThreadIdx_x). Most device code needs no additional conversion, since HIP and CUDA have similar names for math and built-in functions. -HIP currently requires manual addition of one more arguments to the kernel so that the host can communicate the execution configuration to the device. +The clang-hipify tool will automatically modify the kernel signature as needed (automating a step that used to be done manually) Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support. -Developers should always expect to perform some platform-specific tuning and optimization. +In general, developers should always expect to perform some platform-specific tuning and optimization. ### What is NVCC? NVCC is Nvidia's compiler driver for compiling "CUDA C++" code into PTX or device code for Nvidia GPUs. It's a closed-source binary product that comes with CUDA SDKs. diff --git a/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp b/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp index 2ec686f260..4be2ea258d 100644 --- a/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp +++ b/samples/1_Utils/hipBusBandwidth/ResultDatabase.cpp @@ -7,16 +7,22 @@ using namespace std; +#define SORT_RETAIN_ATTS_ORDER 1 + + bool ResultDatabase::Result::operator<(const Result &rhs) const { if (test < rhs.test) return true; if (test > rhs.test) return false; +#if (SORT_RETAIN_ATTS_ORDER == 0) + // For ties, sort by the value of the attribute: if (atts < rhs.atts) return true; if (atts > rhs.atts) return false; +#endif return false; // less-operator returns false on equal } @@ -189,7 +195,8 @@ void ResultDatabase::AddResult(const string &test_orig, void ResultDatabase::DumpDetailed(ostream &out) { vector sorted(results); - sort(sorted.begin(), sorted.end()); + + stable_sort(sorted.begin(), sorted.end()); const int testNameW = 24 ; const int attW = 12; @@ -281,7 +288,8 @@ void ResultDatabase::DumpDetailed(ostream &out) void ResultDatabase::DumpSummary(ostream &out) { vector sorted(results); - sort(sorted.begin(), sorted.end()); + + stable_sort(sorted.begin(), sorted.end()); const int testNameW = 24 ; const int attW = 12; @@ -377,7 +385,7 @@ void ResultDatabase::DumpCsv(string fileName) bool emptyFile; vector sorted(results); - sort(sorted.begin(), sorted.end()); + stable_sort(sorted.begin(), sorted.end()); //Check to see if the file is empty - if so, add the headers emptyFile = this->IsFileEmpty(fileName); diff --git a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp b/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp index faff9ba6e9..a42a561ac7 100644 --- a/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp +++ b/samples/1_Utils/hipBusBandwidth/hipBusBandwidth.cpp @@ -49,8 +49,8 @@ std::string sizeToString(int size) using namespace std; stringstream ss; if (size < 0) { - // char (09, horiz tab) lexically sorts before " " so will cause Byte values to be displayed before kB. - ss << char(0x09)/*tab*/ << setfill('0') << setw(3) << -size << "B"; + // char (-) lexically sorts before " " so will cause Byte values to be displayed before kB. + ss << "+" << setfill('0') << setw(3) << -size << "By"; } else { ss << size << "kB"; } diff --git a/tests/README.md b/tests/README.md index de73652e43..56bb4e7edd 100644 --- a/tests/README.md +++ b/tests/README.md @@ -51,9 +51,11 @@ ctest -R Memcpy ``` -### If a test fails: +### If a test fails - how to debug a test Extract the commandline from the testing log: + +(From the test build directory, perhaps hip/tests/build) $ grep -A3 -m2 hipMemcpy-size Testing/Temporary/LastTest.log 36/47 Testing: hipMemcpy-size 36/47 Test: hipMemcpy-size