From 30ce25e62796c8defaa456e5e7f9b4649537f5db Mon Sep 17 00:00:00 2001 From: Siu Chi Chan Date: Thu, 18 Oct 2018 16:53:03 -0400 Subject: [PATCH 1/6] Support more than one bundles in a single .kernel section When compiling with Early Finalization enabled in HCC, the resulting .kernel section of the host object now may contain more than one device code bundles. This is to teach the HIP runtime to correctly extract all the bundles from the .kernel section. --- include/hip/hcc_detail/code_object_bundle.hpp | 5 +++++ src/program_state.cpp | 14 ++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/hip/hcc_detail/code_object_bundle.hpp b/include/hip/hcc_detail/code_object_bundle.hpp index c36dd91813..7b97503c16 100644 --- a/include/hip/hcc_detail/code_object_bundle.hpp +++ b/include/hip/hcc_detail/code_object_bundle.hpp @@ -84,6 +84,9 @@ class Bundled_code_header { std::copy_n(f + y.header.offset, y.header.bundle_sz, std::back_inserter(y.blob)); it += y.header.triple_sz; + + x.bundled_code_size = std::max(x.bundled_code_size, + y.header.offset + y.header.bundle_sz); } return true; @@ -123,6 +126,8 @@ class Bundled_code_header { // MANIPULATORS Bundled_code_header& operator=(const Bundled_code_header&) = default; Bundled_code_header& operator=(Bundled_code_header&&) = default; + + size_t bundled_code_size = 0; }; // CREATORS diff --git a/src/program_state.cpp b/src/program_state.cpp index 8766134582..88cdeeb404 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -209,10 +209,16 @@ const unordered_map>>& code_object_blobs(bool reb nullptr); for (auto&& blob : blobs) { - Bundled_code_header tmp{blob}; - if (valid(tmp)) { - for (auto&& bundle : bundles(tmp)) { - r[triple_to_hsa_isa(bundle.triple)].push_back(bundle.blob); + for (auto sub_blob = blob.begin(); sub_blob != blob.end(); ) { + Bundled_code_header tmp(sub_blob, blob.end()); + if (valid(tmp)) { + for (auto&& bundle : bundles(tmp)) { + r[triple_to_hsa_isa(bundle.triple)].push_back(bundle.blob); + } + sub_blob+=tmp.bundled_code_size; + } + else { + break; } } } From 817b27d5306da56ddf7c8fe5438289261d595a41 Mon Sep 17 00:00:00 2001 From: Aaron Enye Shi Date: Fri, 19 Oct 2018 20:07:08 +0000 Subject: [PATCH 2/6] Remove libhip_device.a static library Move remaining function definitions from device_util.cpp to hip_runtime.h header. We can now remove the static library completely as device_util.cpp was the last file part of target hip::hip_device . --- CMakeLists.txt | 18 +++++-------- bin/hipcc | 4 +-- include/hip/hcc_detail/device_functions.h | 9 +++++++ include/hip/hcc_detail/hip_runtime.h | 3 +-- packaging/hip-targets-release.cmake | 10 ------- packaging/hip-targets.cmake | 16 +++-------- packaging/hip_hcc.txt | 1 - src/device_util.cpp | 33 ----------------------- 8 files changed, 23 insertions(+), 71 deletions(-) delete mode 100644 src/device_util.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c62ea4365..e018a5e4fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,9 +207,6 @@ if(HIP_PLATFORM STREQUAL "hcc") src/env.cpp src/program_state.cpp) - set(SOURCE_FILES_DEVICE - src/device_util.cpp) - execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906") @@ -222,19 +219,18 @@ if(HIP_PLATFORM STREQUAL "hcc") target_link_libraries(hip_hcc PRIVATE hc_am) target_link_libraries(hip_hcc_static PRIVATE hc_am) endif() - add_library(hip_device STATIC ${SOURCE_FILES_DEVICE}) string(REPLACE " " ";" HCC_CXX_FLAGS_LIST ${HCC_CXX_FLAGS}) - foreach(TARGET hip_hcc hip_hcc_static hip_device) + foreach(TARGET hip_hcc hip_hcc_static) target_include_directories(${TARGET} SYSTEM INTERFACE $/include>;${HSA_PATH}/include) endforeach() add_library(host INTERFACE) target_link_libraries(host INTERFACE hip_hcc) add_library(device INTERFACE) if(HIP_COMPILER STREQUAL "hcc") - target_link_libraries(device INTERFACE host hip_device hcc::hccrt hcc::hc_am) - elseif(HIP_COMPILER STREQUAL "clang") - target_link_libraries(device INTERFACE host hip_device) + target_link_libraries(device INTERFACE host hcc::hccrt hcc::hc_am) + else() + target_link_libraries(device INTERFACE host) endif() # Generate .hipInfo @@ -256,7 +252,7 @@ endif() ############################# # Install hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hip_device DESTINATION lib) + install(TARGETS hip_hcc_static hip_hcc DESTINATION lib) # Install .hipInfo install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) @@ -284,7 +280,7 @@ set(BIN_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/bin) set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip) if(HIP_PLATFORM STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hip_device host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) + install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) include(CMakePackageConfigHelpers) @@ -353,7 +349,7 @@ add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} . COMMAND cp *.rpm ${PROJECT_BINARY_DIR} COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} WORKING_DIRECTORY ${BUILD_DIR} - DEPENDS hip_hcc hip_device hip_hcc_static) + DEPENDS hip_hcc hip_hcc_static) # Package: hip_nvcc set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_nvcc) diff --git a/bin/hipcc b/bin/hipcc index f22884ea1b..80234f8213 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -719,9 +719,9 @@ if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc') if ($needHipHcc) { if ($linkType eq 0) { - substr($HIPLDFLAGS,0,0) = " $HIP_PATH/lib/libhip_hcc_static.a $HIP_PATH/lib/libhip_device.a " ; + substr($HIPLDFLAGS,0,0) = " $HIP_PATH/lib/libhip_hcc_static.a " ; } else { - substr($HIPLDFLAGS,0,0) = " -Wl,--rpath=$HIP_PATH/lib $HIP_PATH/lib/libhip_hcc.so $HIP_PATH/lib/libhip_device.a "; + substr($HIPLDFLAGS,0,0) = " -Wl,--rpath=$HIP_PATH/lib $HIP_PATH/lib/libhip_hcc.so "; } } diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 42927e3246..974c9b4618 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -774,6 +774,15 @@ static void __threadfence_system() __atomic_work_item_fence(0, __memory_order_seq_cst, __memory_scope_all_svm_devices); } +// abort +__device__ +inline +__attribute__((weak)) +void abort() { + return __builtin_trap(); +} + + #endif // __HCC_OR_HIP_CLANG__ #ifdef __HCC__ diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 3db06bb15e..60d145c884 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -181,8 +181,7 @@ extern int HIP_TRACE_API; #define __HCC_C__ #endif -// abort -__device__ void abort(); +__host__ inline void* __get_dynamicgroupbaseptr() { return nullptr; } #if __HIP_ARCH_GFX701__ == 0 diff --git a/packaging/hip-targets-release.cmake b/packaging/hip-targets-release.cmake index 0ae7405cde..83c207810c 100644 --- a/packaging/hip-targets-release.cmake +++ b/packaging/hip-targets-release.cmake @@ -41,15 +41,5 @@ endif() list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc ) list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "/opt/rocm/hip/lib/libhip_hcc.so" ) -# Import target "hip::hip_device" for configuration "Release" -set_property(TARGET hip::hip_device APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) -set_target_properties(hip::hip_device PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_device.a" - ) - -list(APPEND _IMPORT_CHECK_TARGETS hip::hip_device ) -list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_device "/opt/rocm/hip/lib/libhip_device.a" ) - # Commands beyond this point should not need to know the version. set(CMAKE_IMPORT_FILE_VERSION) diff --git a/packaging/hip-targets.cmake b/packaging/hip-targets.cmake index ec2fa716a6..d7a6b4d588 100644 --- a/packaging/hip-targets.cmake +++ b/packaging/hip-targets.cmake @@ -16,7 +16,7 @@ set(CMAKE_IMPORT_FILE_VERSION 1) set(_targetsDefined) set(_targetsNotDefined) set(_expectedTargets) -foreach(_expectedTarget hip::hip_hcc_static hip::hip_hcc hip::hip_device hip::host hip::device) +foreach(_expectedTarget hip::hip_hcc_static hip::hip_hcc hip::host hip::device) list(APPEND _expectedTargets ${_expectedTarget}) if(NOT TARGET ${_expectedTarget}) list(APPEND _targetsNotDefined ${_expectedTarget}) @@ -57,14 +57,6 @@ set_target_properties(hip::hip_hcc PROPERTIES INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" ) -# Create imported target hip::hip_device -add_library(hip::hip_device STATIC IMPORTED) - -set_target_properties(hip::hip_device PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" -) - # Create imported target hip::host add_library(hip::host INTERFACE IMPORTED) @@ -75,13 +67,13 @@ set_target_properties(hip::host PROPERTIES # Create imported target hip::device add_library(hip::device INTERFACE IMPORTED) -if(HIP_COMPILER STREQUAL "clang") +if(HIP_COMPILER STREQUAL "hcc") set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host;hip::hip_device" + INTERFACE_LINK_LIBRARIES "hip::host;hcc::hccrt;hcc::hc_am" ) else() set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host;hip::hip_device;hcc::hccrt;hcc::hc_am" + INTERFACE_LINK_LIBRARIES "hip::host" ) endif() diff --git a/packaging/hip_hcc.txt b/packaging/hip_hcc.txt index 9d4b96761d..fe866e47f9 100644 --- a/packaging/hip_hcc.txt +++ b/packaging/hip_hcc.txt @@ -3,7 +3,6 @@ project(hip_hcc) install(FILES @PROJECT_BINARY_DIR@/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib) -install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) diff --git a/src/device_util.cpp b/src/device_util.cpp deleted file mode 100644 index c86e52617b..0000000000 --- a/src/device_util.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* -Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "device_util.h" -#include "hip/hcc_detail/device_functions.h" -#include "hip/hcc_detail/grid_launch.h" -#include "hip/hip_runtime.h" -#include - -// abort -__device__ void abort() { return hc::abort(); } - -__host__ void* __get_dynamicgroupbaseptr() { return nullptr; } From 21f044eac889820150d6400f540145783a78a228 Mon Sep 17 00:00:00 2001 From: Anton Gorenko Date: Mon, 29 Oct 2018 18:12:00 +0600 Subject: [PATCH 3/6] Fix allocation size of arrays with multiple and/or non-32-bit channels hipMallocArray and hipMalloc3DArray must use sum of bits of all components. --- src/hip_memory.cpp | 43 +++++-------------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index bda6ad2650..93ac527826 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -574,24 +574,8 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, si size = size * height; } - size_t allocSize = 0; - switch (desc->f) { - case hipChannelFormatKindSigned: - allocSize = size * sizeof(int); - break; - case hipChannelFormatKindUnsigned: - allocSize = size * sizeof(unsigned int); - break; - case hipChannelFormatKindFloat: - allocSize = size * sizeof(float); - break; - case hipChannelFormatKindNone: - allocSize = size * sizeof(size_t); - break; - default: - hip_status = hipErrorUnknown; - break; - } + const size_t allocSize = size * ((desc->x + desc->y + desc->z + desc->w) / 8); + hc::accelerator acc = ctx->getDevice()->_acc; hsa_agent_t* agent = static_cast(acc.get_hsa_agent()); @@ -800,24 +784,7 @@ hipError_t hipMalloc3DArray(hipArray** array, const struct hipChannelFormatDesc* const unsigned am_flags = 0; const size_t size = extent.width * extent.height * extent.depth; - size_t allocSize = 0; - switch (desc->f) { - case hipChannelFormatKindSigned: - allocSize = size * sizeof(int); - break; - case hipChannelFormatKindUnsigned: - allocSize = size * sizeof(unsigned int); - break; - case hipChannelFormatKindFloat: - allocSize = size * sizeof(float); - break; - case hipChannelFormatKindNone: - allocSize = size * sizeof(size_t); - break; - default: - hip_status = hipErrorUnknown; - break; - } + const size_t allocSize = size * ((desc->x + desc->y + desc->z + desc->w) / 8); hc::accelerator acc = ctx->getDevice()->_acc; hsa_agent_t* agent = static_cast(acc.get_hsa_agent()); @@ -1688,7 +1655,7 @@ hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t sp } else { try { if(!isLocked){ - for (int i = 0; i < height; ++i) + for (int i = 0; i < height; ++i) e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch, (unsigned char*)src + i * spitch, width, kind, stream); } else{ @@ -1738,7 +1705,7 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { stream->locked_wait(); } else { e = hipErrorInvalidValue; - } + } return ihipLogStatus(e); } From bb447dd76e67012968ceb5344cc7e1c9881cef5e Mon Sep 17 00:00:00 2001 From: Jeff Daily Date: Mon, 29 Oct 2018 09:35:25 -0700 Subject: [PATCH 4/6] typedef struct hipFuncAttributes now C compatible. Fixes #591. Fixes #694. --- include/hip/hcc_detail/hip_runtime_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 60609fd135..c9ff32d197 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -94,7 +94,7 @@ typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; -struct hipFuncAttributes { +typedef struct hipFuncAttributes { int binaryVersion; int cacheModeCA; size_t constSizeBytes; @@ -105,7 +105,7 @@ struct hipFuncAttributes { int preferredShmemCarveout; int ptxVersion; size_t sharedSizeBytes; -}; +} hipFuncAttributes; typedef struct ihipEvent_t* hipEvent_t; From eff5d3fc1b7125116472d01748d5bae3eaae3589 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 1 Nov 2018 16:57:57 +0300 Subject: [PATCH 5/6] [HIPIFY][fix] Fix typo and functions' string arguments + Fix typo with missing comma in counterNames array + Change std::string argument to const std::string& argument in all functions --- hipify-clang/src/Statistics.cpp | 12 ++++++------ hipify-clang/src/Statistics.h | 12 ++++++------ hipify-clang/src/StringUtils.cpp | 2 +- hipify-clang/src/StringUtils.h | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/hipify-clang/src/Statistics.cpp b/hipify-clang/src/Statistics.cpp index 86eb7c753c..2d37c3156c 100644 --- a/hipify-clang/src/Statistics.cpp +++ b/hipify-clang/src/Statistics.cpp @@ -14,7 +14,7 @@ const char *counterNames[NUM_CONV_TYPES] = { "addressing", // CONV_ADDRESSING "stream", // CONV_STREAM "event", // CONV_EVENT - "external_resource_interop" // CONV_EXT_RES + "external_resource_interop", // CONV_EXT_RES "stream_memory", // CONV_STREAM_MEMORY "execution", // CONV_EXECUTION "graph", // CONV_GRAPH @@ -74,7 +74,7 @@ void printStat(std::ostream *csv, llvm::raw_ostream* printOut, const std::string } // Anonymous namespace -void StatCounter::incrementCounter(const hipCounter& counter, std::string name) { +void StatCounter::incrementCounter(const hipCounter& counter, const std::string& name) { counters[name]++; apiCounters[(int) counter.apiType]++; convTypeCounters[(int) counter.type]++; @@ -100,7 +100,7 @@ int StatCounter::getConvSum() { return acc; } -void StatCounter::print(std::ostream* csv, llvm::raw_ostream* printOut, std::string prefix) { +void StatCounter::print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string& prefix) { conditionalPrint(csv, printOut, "\nCUDA ref type;Count\n", "[HIPIFY] info: " + prefix + " refs by type:\n"); for (int i = 0; i < NUM_CONV_TYPES; i++) { if (convTypeCounters[i] > 0) { @@ -117,7 +117,7 @@ void StatCounter::print(std::ostream* csv, llvm::raw_ostream* printOut, std::str } } -Statistics::Statistics(std::string name): fileName(name) { +Statistics::Statistics(const std::string& name): fileName(name) { // Compute the total bytes/lines in the input file. std::ifstream src_file(name, std::ios::binary | std::ios::ate); src_file.clear(); @@ -129,7 +129,7 @@ Statistics::Statistics(std::string name): fileName(name) { ///////// Counter update routines ////////// -void Statistics::incrementCounter(const hipCounter &counter, std::string name) { +void Statistics::incrementCounter(const hipCounter &counter, const std::string& name) { if (counter.unsupported) { unsupported.incrementCounter(counter, name); } else { @@ -218,7 +218,7 @@ Statistics& Statistics::current() { return *Statistics::currentStatistics; } -void Statistics::setActive(std::string name) { +void Statistics::setActive(const std::string& name) { stats.emplace(std::make_pair(name, Statistics{name})); Statistics::currentStatistics = &stats.at(name); } diff --git a/hipify-clang/src/Statistics.h b/hipify-clang/src/Statistics.h index 53d017b1a3..0ce8e0de67 100644 --- a/hipify-clang/src/Statistics.h +++ b/hipify-clang/src/Statistics.h @@ -22,7 +22,7 @@ enum ConvTypes { // Driver API : 5.5. Device Management, 5.6. Device Management [DEPRECATED] // Runtime API: 5.1. Device Management CONV_DEVICE, - // Driver API : 5.8.Context Management, 5.9. Context Management [DEPRECATED] + // Driver API : 5.7. Primary Context Management, 5.8.Context Management, 5.9. Context Management [DEPRECATED] CONV_CONTEXT, // Driver API : 5.10. Module Management CONV_MODULE, @@ -134,11 +134,11 @@ private: int convTypeCounters[NUM_CONV_TYPES] = {}; public: - void incrementCounter(const hipCounter& counter, std::string name); + void incrementCounter(const hipCounter& counter, const std::string& name); // Add the counters from `other` onto the counters of this object. void add(const StatCounter& other); int getConvSum(); - void print(std::ostream* csv, llvm::raw_ostream* printOut, std::string prefix); + void print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string& prefix); }; /** @@ -156,8 +156,8 @@ class Statistics { chr::steady_clock::time_point completionTime; public: - Statistics(std::string name); - void incrementCounter(const hipCounter &counter, std::string name); + Statistics(const std::string& name); + void incrementCounter(const hipCounter &counter, const std::string& name); // Add the counters from `other` onto the counters of this object. void add(const Statistics &other); void lineTouched(int lineNumber); @@ -192,5 +192,5 @@ public: * Set the active Statistics object to the named one, creating it if necessary, and write the completion * timestamp into the currently active one. */ - static void setActive(std::string name); + static void setActive(const std::string& name); }; diff --git a/hipify-clang/src/StringUtils.cpp b/hipify-clang/src/StringUtils.cpp index 6504d39010..3aaa4d7909 100644 --- a/hipify-clang/src/StringUtils.cpp +++ b/hipify-clang/src/StringUtils.cpp @@ -7,7 +7,7 @@ llvm::StringRef unquoteStr(llvm::StringRef s) { return s; } -void removePrefixIfPresent(std::string &s, std::string prefix) { +void removePrefixIfPresent(std::string &s, const std::string& prefix) { if (s.find(prefix) != 0) { return; } diff --git a/hipify-clang/src/StringUtils.h b/hipify-clang/src/StringUtils.h index c0be9f6227..8c5bf58da8 100644 --- a/hipify-clang/src/StringUtils.h +++ b/hipify-clang/src/StringUtils.h @@ -11,4 +11,4 @@ llvm::StringRef unquoteStr(llvm::StringRef s); /** * If `s` starts with `prefix`, remove it. Otherwise, does nothing. */ -void removePrefixIfPresent(std::string &s, std::string prefix); +void removePrefixIfPresent(std::string &s, const std::string& prefix); From 9f1666e37a4431bcfef35b4947c1defd3c3570c0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 1 Nov 2018 19:11:40 +0300 Subject: [PATCH 6/6] [HIPIFY][doc] Update README.md + Split Linux and Windows sections + Rewrite Testing section --- hipify-clang/README.md | 190 ++++++++++++++++++++++++++++++++++------- 1 file changed, 157 insertions(+), 33 deletions(-) diff --git a/hipify-clang/README.md b/hipify-clang/README.md index ec46c7cb2f..431c0a844a 100644 --- a/hipify-clang/README.md +++ b/hipify-clang/README.md @@ -11,6 +11,7 @@ - [Build and install](#build-and-install) * [Building](#building) * [Testing](#testing) + * [Linux](#linux) * [Windows](#windows) - [Running and using hipify-clang](#running-and-using-hipify-clang) - [Disclaimer](#disclaimer) @@ -80,7 +81,7 @@ Debug build type `-DCMAKE_BUILD_TYPE=Debug` is also supported and tested; `LLVM+ The binary can then be found at `./dist/bin/hipify-clang`. -### Test +### Testing `hipify-clang` has unit tests using LLVM [`lit`](https://llvm.org/docs/CommandGuide/lit.html)/[`FileCheck`](https://llvm.org/docs/CommandGuide/FileCheck.html). @@ -88,61 +89,183 @@ The binary can then be found at `./dist/bin/hipify-clang`. To run it: 1. Download [`LLVM`](http://releases.llvm.org/6.0.1/llvm-6.0.1.src.tar.xz)+[`CLANG`](http://releases.llvm.org/6.0.1/cfe-6.0.1.src.tar.xz) sources. -2. Build [`LLVM+CLANG`](http://llvm.org/docs/CMake.html). - For instance: - ```shell +2. Build [`LLVM+CLANG`](http://llvm.org/docs/CMake.html): + ```shell cd llvm mkdir build dist cd build + ``` + - **Linux**: - cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm \ - -DCMAKE_BUILD_TYPE=Release \ - -Thost=x64 \ - ../llvm + ```shell + cmake \ + -DCMAKE_INSTALL_PREFIX=../dist \ + -DLLVM_SOURCE_DIR=../llvm \ + -DCMAKE_BUILD_TYPE=Release \ + ../llvm + make -j install + ``` + - **Windows**: + +```shell + cmake \ + -G "Visual Studio 15 2017 Win64" \ + -DCMAKE_INSTALL_PREFIX=../dist \ + -DLLVM_SOURCE_DIR=../llvm \ + -DCMAKE_BUILD_TYPE=Release \ + -Thost=x64 \ + ../llvm +``` + +                Run `Visual Studio 15 2017`, open the generated `LLVM.sln`, build all, build project `INSTALL`. - make -j install - ``` - On Windows the following option should be specified for `cmake` at first place: `-G "Visual Studio 15 2017 Win64"`; the generated `LLVM.sln` should be built by `Visual Studio 15 2017` instead of `make`. 3. Ensure [`CUDA`](https://developer.nvidia.com/cuda-toolkit-archive) of minimum version 7.5 is installed. - * Having multiple CUDA installations, in order to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: + * Having multiple CUDA installations to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: - `-DCUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0"` + - Linux: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-8.0` - * On Windows `CUDA_SDK_ROOT_DIR` option should be specified as well: + - Windows: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0"` - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v9.0"` + `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v9.0"` 4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of version corresponding to CUDA's version is installed. * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.1` + - Linux: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-8.0-v7.1` + + - Windows: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-9.0-windows10-x64-v7.1` 5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. 6. Ensure `lit` and `FileCheck` are installed - these are distributed with LLVM. - * installing `lit` into `python` might be required: + * Install `lit` into `python`: - `python f:/LLVM/6.0.1/llvm/utils/lit/setup.py install`, + - Linux: `python /srv/git/LLVM/6.0.1/llvm/utils/lit/setup.py install` - where `f:/LLVM/6.0.1/llvm` is LLVM sources root directory. + - Windows: `python f:/LLVM/6.0.1/llvm/utils/lit/setup.py install` - * Starting with LLVM 6.0.1 path to llvm-lit.py script should be specified by the `LLVM_EXTERNAL_LIT` option: + * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - `-DLLVM_EXTERNAL_LIT=f:/LLVM/6.0.1/build/Release/bin/llvm-lit.py`, + - Linux: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/6.0.1/build/bin/llvm-lit` - where `f:/LLVM/6.0.1/build/Release` is LLVM build directory. + - Windows: `-DLLVM_EXTERNAL_LIT=f:/LLVM/6.0.1/build/Release/bin/llvm-lit.py` -7. Build with the `HIPIFY_CLANG_TESTS` option turned on: -DHIPIFY_CLANG_TESTS=1. +7. Set `HIPIFY_CLANG_TESTS` option turned on: `-DHIPIFY_CLANG_TESTS=1`. -8. `make test-hipify` +8. Run `cmake`: + * [Linux](#linux) + * [Windows](#windows) - On Windows after `cmake` the project `test-hipify` in the generated `hipify-clang.sln` should be built by `Visual Studio 15 2017` instead of `make test-hipify`. +9. Run tests: + + - Linux: `make test-hipify`. + + - Windows: run `Visual Studio 15 2017`, open the generated `hipify-clang.sln`, build project `test-hipify`. + +### Linux + +On Linux (Ubuntu 14-18) the following configurations are tested: + +LLVM 5.0.0 - 6.0.1, CUDA 8.0, cudnn-8.0 + +Build system for the above configurations: + +Python 2.7 (min), cmake 3.5.2 (min), GNU C/C++ 5.4.0 (min). + +Here is an example of building `hipify-clang` with testing support on `Ubuntu 16.04`: + +```shell +cmake + -DHIPIFY_CLANG_TESTS=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=../dist \ + -DCMAKE_PREFIX_PATH=/srv/git/LLVM/6.0.1/dist \ + -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-8.0 \ + -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-8.0-v7.1 \ + -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/6.0.1/build/bin/llvm-lit \ + .. +``` +*A corresponding successful output:* +```shell +-- The C compiler identification is GNU 5.4.0 +-- The CXX compiler identification is GNU 5.4.0 +-- Check for working C compiler: /usr/bin/cc +-- Check for working C compiler: /usr/bin/cc -- works +-- Detecting C compiler ABI info +-- Detecting C compiler ABI info - done +-- Detecting C compile features +-- Detecting C compile features - done +-- Check for working CXX compiler: /usr/bin/c++ +-- Check for working CXX compiler: /usr/bin/c++ -- works +-- Detecting CXX compiler ABI info +-- Detecting CXX compiler ABI info - done +-- Detecting CXX compile features +-- Detecting CXX compile features - done +-- Found LLVM 6.0.1: +-- - CMake module path: /srv/git/LLVM/6.0.1/dist/lib/cmake/llvm +-- - Include path : /srv/git/LLVM/6.0.1/dist/include +-- - Binary path : /srv/git/LLVM/6.0.1/dist/bin +-- Linker detection: GNU ld +-- Found PythonInterp: /usr/bin/python2.7 (found suitable version "2.7.12", minimum required is "2.7") +-- Found lit: /usr/local/bin/lit +-- Found FileCheck: /srv/git/LLVM/6.0.1/dist/bin/FileCheck +-- Looking for pthread.h +-- Looking for pthread.h - found +-- Looking for pthread_create +-- Looking for pthread_create - not found +-- Looking for pthread_create in pthreads +-- Looking for pthread_create in pthreads - not found +-- Looking for pthread_create in pthread +-- Looking for pthread_create in pthread - found +-- Found Threads: TRUE +-- Found CUDA: /usr/local/cuda-8.0 (found version "8.0") +-- Configuring done +-- Generating done +-- Build files have been written to: /srv/git/HIP/hipify-clang/build +``` +```shell +make test-hipify +``` +*A corresponding successful output:* +```shell +[100%] Running HIPify regression tests +-- Testing: 28 tests, 12 threads -- +PASS: hipify :: allocators.cu (1 of 28) +PASS: hipify :: coalescing.cu (2 of 28) +PASS: hipify :: cuDNN/cudnn_softmax.cu (3 of 28) +PASS: hipify :: cuFFT/simple_cufft.cu (4 of 28) +PASS: hipify :: cuComplex/cuComplex_Julia.cu (5 of 28) +PASS: hipify :: cuBLAS/cublas_sgemm_matrix_multiplication.cu (6 of 28) +PASS: hipify :: cuBLAS/cublas_1_based_indexing.cu (7 of 28) +PASS: hipify :: cuBLAS/cublas_0_based_indexing.cu (8 of 28) +PASS: hipify :: axpy.cu (9 of 28) +PASS: hipify :: dynamic_shared_memory.cu (10 of 28) +PASS: hipify :: headers_test_01.cu (11 of 28) +PASS: hipify :: headers_test_02.cu (12 of 28) +PASS: hipify :: headers_test_03.cu (13 of 28) +PASS: hipify :: headers_test_05.cu (14 of 28) +PASS: hipify :: cuDNN/cudnn_convolution_forward.cu (15 of 28) +PASS: hipify :: cuRAND/poisson_api_example.cu (16 of 28) +PASS: hipify :: cudaRegister.cu (17 of 28) +PASS: hipify :: headers_test_06.cu (18 of 28) +PASS: hipify :: headers_test_04.cu (19 of 28) +PASS: hipify :: intro.cu (20 of 28) +PASS: hipify :: headers_test_07.cu (21 of 28) +PASS: hipify :: square.cu (22 of 28) +PASS: hipify :: static_shared_memory.cu (23 of 28) +PASS: hipify :: vec_add.cu (24 of 28) +PASS: hipify :: headers_test_08.cu (25 of 28) +PASS: hipify :: cuRAND/benchmark_curand_generate.cpp (26 of 28) +PASS: hipify :: cuRAND/benchmark_curand_kernel.cpp (27 of 28) +PASS: hipify :: headers_test_09.cu (28 of 28) +Testing Time: 1.71s + Expected Passes : 28 +[100%] Built target test-hipify +``` ### Windows @@ -172,7 +295,7 @@ cmake -Thost=x64 .. ``` -A corresponding successful output: +*A corresponding successful output:* ```shell -- Found LLVM 6.0.1: -- - CMake module path: F:/LLVM/6.0.1/dist/lib/cmake/llvm @@ -194,12 +317,13 @@ To process a file, `hipify-clang` needs access to the same headers that would be For example: ```shell -hipify-clang square.cu -- \ +./hipify-clang \ + square.cu \ + -- \ -x cuda \ - --cuda-path=/opt/cuda \ - --cuda-gpu-arch=sm_30 \ - -isystem /opt/cuda/samples/common/inc - -I /opt/cuda/cuDNN + --cuda-path=/usr/local/cuda-8.0 \ + --cuda-gpu-arch=sm_50 \ + -isystem /usr/local/cuda-8.0/samples/common/inc ``` `hipify-clang` arguments are given first, followed by a separator, and then the arguments you'd pass to `clang` if you