diff --git a/projects/clr/hipamd/CMakeLists.txt b/projects/clr/hipamd/CMakeLists.txt index 9b62c20e59..7df6bed4db 100644 --- a/projects/clr/hipamd/CMakeLists.txt +++ b/projects/clr/hipamd/CMakeLists.txt @@ -8,6 +8,8 @@ set(HIP_VERSION_MAJOR "0") set(HIP_VERSION_MINOR "84") set(HIP_VERSION_PATCH "0") +add_subdirectory(clang-hipify) + ############################# # Configure variables ############################# @@ -16,10 +18,9 @@ if(NOT DEFINED HIP_PLATFORM) if(NOT DEFINED ENV{HIP_PLATFORM}) execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM - OUTPUT_STRIP_TRAILING_WHITESPACE) else() set(HIP_PLATFORM $ENV{HIP_PLATFORM} CACHE STRING "HIP Platform") - endif() + OUTPUT_STRIP_TRAILING_WHITESPACE) endif() message(STATUS "HIP Platform: " ${HIP_PLATFORM}) diff --git a/projects/clr/hipamd/README.md b/projects/clr/hipamd/README.md index d597a9e7d8..2244c656cd 100644 --- a/projects/clr/hipamd/README.md +++ b/projects/clr/hipamd/README.md @@ -15,6 +15,7 @@ New projects can be developed directly in the portable HIP C++ language and can cd HIP-privatestaging mkdir build cd build +<<<<<<< HEAD cmake .. make make install @@ -26,6 +27,13 @@ make install *By default cmake installs HIP to /opt/rocm/hip (can be overridden by setting ```-DCMAKE_INSTALL_PREFIX=/where/to/install/hip``` in the cmake step).* *Make sure HIP_PATH is pointed to `/where/to/install/hip` and PATH includes `$HIP_PATH/bin`. This requirement is optional, but required to run any HIP test infrastructure.* +======= +cmake -DHSA_PATH=/path/to/hsa -DHCC_HOME=/path/to/hcc -DCMAKE_INSTALL_PREFIX=/where/to/install/hip -DLLVM_DIR=/path/to/clang-llvm-3.8 -DCMAKE_BUILD_TYPE=Release .. +make +make install +``` +Make sure HIP_PATH is pointed to `/where/to/install/hip` and PATH includes `$HIP_PATH/bin`. This requirement is optional, but required to run any HIP test infrastructure. The path `/path/to/clang-llvm-3.8` should be specified for [clang-hipify](README.md#clang-hipify) utility build. +>>>>>>> clang-hipify ## More Info: - [HIP FAQ](docs/markdown/hip_faq.md) @@ -52,6 +60,22 @@ HIP code can be developed either on AMD ROCm platform using hcc compiler, or a C * Optionally, consider adding /opt/rocm/bin to your path to make it easier to use the tools. + +#####clang-hipify +To build and run clang based hipify utiliy a set of CUDA headers and clang+llvm 3.8 binary package are required: +- download and install CUDA minimal prerequisites: + 1. Download "deb(network)" variant of target installer from https://developer.nvidia.com/cuda-downloads. E.g. at the moment the link is http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_7.5-18_amd64.deb + 2. install clang prerequisites with the following commands: +``` +wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_7.5-18_amd64.deb +sudo dpkg -i cuda-repo-ubuntu1404_7.5-18_amd64.deb +sudo apt-get update && sudo apt-get install cuda-minimal-build-7-5 cuda-curand-dev-7-5 +``` +- download and unpack clang+llvm 3.8 binary package: +``` +wget http://llvm.org/releases/3.8.0/clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz +tar xvfJ clang+llvm-3.8.0-x86_64-linux-gnu-ubuntu-14.04.tar.xz -C /path/to/clang-llvm-3.8 +``` #### NVIDIA (nvcc) * Install CUDA SDK from manufacturer website * By default HIP looks for CUDA SDK in /usr/local/cuda (can be overriden by setting CUDA_PATH env variable) diff --git a/projects/clr/hipamd/clang-hipify/CMakeLists.txt b/projects/clr/hipamd/clang-hipify/CMakeLists.txt new file mode 100644 index 0000000000..574677aa7e --- /dev/null +++ b/projects/clr/hipamd/clang-hipify/CMakeLists.txt @@ -0,0 +1,74 @@ +cmake_minimum_required(VERSION 2.8.8) + +project(hipify-clang) + +find_package(LLVM 3.8 REQUIRED PATHS ${LLVM_DIR} NO_DEFAULT_PATH) + +list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) +include(AddLLVM) + +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") + +include_directories(${LLVM_INCLUDE_DIRS}) +link_directories(${LLVM_LIBRARY_DIRS}) +add_definitions(${LLVM_DEFINITIONS}) +add_llvm_executable(hipify-clang src/Cuda2Hip.cpp ) +find_program(LIT_COMMAND lit) + +# Link against LLVM and CLANG tools libraries +target_link_libraries(hipify-clang + clangASTMatchers + clangFrontend + clangTooling + clangParse + clangSerialization + clangSema + clangEdit + clangLex + clangAnalysis + clangDriver + clangAST + clangToolingCore + clangRewrite + clangBasic + LLVMProfileData + LLVMSupport + LLVMMCParser + LLVMMC + LLVMBitReader + LLVMOption + LLVMCore) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS}") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -fno-rtti -fvisibility-inlines-hidden") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}\\\"") + +install(TARGETS hipify-clang + DESTINATION bin) + +# tests +set(Python_ADDITIONAL_VERSIONS 2.7) +include(FindPythonInterp) +if( NOT PYTHONINTERP_FOUND ) + message(FATAL_ERROR + "Unable to find Python interpreter, required for builds and testing\n\n" + "Please install Python or specify the PYTHON_EXECUTABLE CMake variable.") +endif() + +set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} ) + +configure_file( + ${CMAKE_SOURCE_DIR}/tests/clang-hipify/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/tests/clang-hipify/lit.site.cfg + @ONLY) + +add_lit_testsuite(test-hipify "Running HIPify regression tests" + ${CMAKE_SOURCE_DIR}/tests/clang-hipify + PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/clang-hipify/lit.site.cfg + DEPENDS hipify-clang lit + ) + +add_custom_target(test-clang-hipify) +add_dependencies(test-clang-hipify test-hipify) +set_target_properties(test-clang-hipify PROPERTIES FOLDER "Tests") diff --git a/projects/clr/hipamd/clang-hipify/src/Cuda2Hip.cpp b/projects/clr/hipamd/clang-hipify/src/Cuda2Hip.cpp new file mode 100644 index 0000000000..1e29c65010 --- /dev/null +++ b/projects/clr/hipamd/clang-hipify/src/Cuda2Hip.cpp @@ -0,0 +1,937 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/** + * @file Cuda2Hip.cpp + * + * This file is compiled and linked into clang based hipify tool. + */ +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendActions.h" +#include "clang/Frontend/TextDiagnosticPrinter.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/MacroArgs.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Rewrite/Core/Rewriter.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "clang/Tooling/Refactoring.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" + +#include +#include + +using namespace clang; +using namespace clang::ast_matchers; +using namespace clang::tooling; +using namespace llvm; + +#define DEBUG_TYPE "cuda2hip" + +enum ConvTypes { + CONV_DEV = 0, + CONV_MEM, + CONV_KERN, + CONV_COORD_FUNC, + CONV_MATH_FUNC, + CONV_SPECIAL_FUNC, + CONV_STREAM, + CONV_EVENT, + CONV_ERR, + CONV_DEF, + CONV_TEX, + CONV_OTHER, + CONV_INCLUDE, + CONV_LITERAL, + CONV_LAST +}; + +const char *counterNames[ConvTypes::CONV_LAST] = { + "dev", "mem", "kern", "coord_func", "math_func", + "special_func", "stream", "event", "err", "def", + "tex", "other", "include", "literal"}; + +namespace { + +struct cuda2hipMap { + cuda2hipMap() { + // defines + cuda2hipRename["__CUDACC__"] = {"__HIPCC__", CONV_DEF}; + + // includes + cuda2hipRename["cuda_runtime.h"] = {"hip_runtime.h", CONV_INCLUDE}; + cuda2hipRename["cuda_runtime_api.h"] = {"hip_runtime_api.h", CONV_INCLUDE}; + + // Error codes and return types: + cuda2hipRename["cudaError_t"] = {"hipError_t", CONV_ERR}; + cuda2hipRename["cudaError"] = {"hipError", CONV_ERR}; + cuda2hipRename["cudaSuccess"] = {"hipSuccess", CONV_ERR}; + + cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR}; + cuda2hipRename["cudaErrorMemoryAllocation"] = {"hipErrorMemoryAllocation", + CONV_ERR}; + cuda2hipRename["cudaErrorMemoryFree"] = {"hipErrorMemoryFree", CONV_ERR}; + cuda2hipRename["cudaErrorUnknownSymbol"] = {"hipErrorUnknownSymbol", + CONV_ERR}; + cuda2hipRename["cudaErrorOutOfResources"] = {"hipErrorOutOfResources", + CONV_ERR}; + cuda2hipRename["cudaErrorInvalidValue"] = {"hipErrorInvalidValue", + CONV_ERR}; + cuda2hipRename["cudaErrorInvalidResourceHandle"] = { + "hipErrorInvalidResourceHandle", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidDevice"] = {"hipErrorInvalidDevice", + CONV_ERR}; + cuda2hipRename["cudaErrorNoDevice"] = {"hipErrorNoDevice", CONV_ERR}; + cuda2hipRename["cudaErrorNotReady"] = {"hipErrorNotReady", CONV_ERR}; + cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR}; + + // error APIs: + cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR}; + cuda2hipRename["cudaPeekAtLastError"] = {"hipPeekAtLastError", CONV_ERR}; + cuda2hipRename["cudaGetErrorName"] = {"hipGetErrorName", CONV_ERR}; + cuda2hipRename["cudaGetErrorString"] = {"hipGetErrorString", CONV_ERR}; + + // Memcpy + cuda2hipRename["cudaMemcpy"] = {"hipMemcpy", CONV_MEM}; + cuda2hipRename["cudaMemcpyHostToHost"] = {"hipMemcpyHostToHost", CONV_MEM}; + cuda2hipRename["cudaMemcpyHostToDevice"] = {"hipMemcpyHostToDevice", + CONV_MEM}; + cuda2hipRename["cudaMemcpyDeviceToHost"] = {"hipMemcpyDeviceToHost", + CONV_MEM}; + cuda2hipRename["cudaMemcpyDeviceToDevice"] = {"hipMemcpyDeviceToDevice", + CONV_MEM}; + cuda2hipRename["cudaMemcpyDefault"] = {"hipMemcpyDefault", CONV_MEM}; + cuda2hipRename["cudaMemcpyToSymbol"] = {"hipMemcpyToSymbol", CONV_MEM}; + cuda2hipRename["cudaMemset"] = {"hipMemset", CONV_MEM}; + cuda2hipRename["cudaMemsetAsync"] = {"hipMemsetAsync", CONV_MEM}; + cuda2hipRename["cudaMemcpyAsync"] = {"hipMemcpyAsync", CONV_MEM}; + cuda2hipRename["cudaMemGetInfo"] = {"hipMemGetInfo", CONV_MEM}; + cuda2hipRename["cudaMemcpyKind"] = {"hipMemcpyKind", CONV_MEM}; + + // Memory management : + cuda2hipRename["cudaMalloc"] = {"hipMalloc", CONV_MEM}; + cuda2hipRename["cudaMallocHost"] = {"hipHostAlloc", CONV_MEM}; + cuda2hipRename["cudaFree"] = {"hipFree", CONV_MEM}; + cuda2hipRename["cudaFreeHost"] = {"hipHostFree", CONV_MEM}; + + // Coordinate Indexing and Dimensions: + cuda2hipRename["threadIdx.x"] = {"hipThreadIdx_x", CONV_COORD_FUNC}; + cuda2hipRename["threadIdx.y"] = {"hipThreadIdx_y", CONV_COORD_FUNC}; + cuda2hipRename["threadIdx.z"] = {"hipThreadIdx_z", CONV_COORD_FUNC}; + + cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; + + cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; + + cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; + + cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; + + cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; + + cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; + + cuda2hipRename["warpSize"] = {"hipWarpSize", CONV_SPECIAL_FUNC}; + + // Events + cuda2hipRename["cudaEvent_t"] = {"hipEvent_t", CONV_EVENT}; + cuda2hipRename["cudaEventCreate"] = {"hipEventCreate", CONV_EVENT}; + cuda2hipRename["cudaEventCreateWithFlags"] = {"hipEventCreateWithFlags", + CONV_EVENT}; + cuda2hipRename["cudaEventDestroy"] = {"hipEventDestroy", CONV_EVENT}; + cuda2hipRename["cudaEventRecord"] = {"hipEventRecord", CONV_EVENT}; + cuda2hipRename["cudaEventElapsedTime"] = {"hipEventElapsedTime", + CONV_EVENT}; + cuda2hipRename["cudaEventSynchronize"] = {"hipEventSynchronize", + CONV_EVENT}; + + // Streams + cuda2hipRename["cudaStream_t"] = {"hipStream_t", CONV_STREAM}; + cuda2hipRename["cudaStreamCreate"] = {"hipStreamCreate", CONV_STREAM}; + cuda2hipRename["cudaStreamCreateWithFlags"] = {"hipStreamCreateWithFlags", + CONV_STREAM}; + cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM}; + cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEven", CONV_STREAM}; + cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", + CONV_STREAM}; + cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM}; + cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", + CONV_STREAM}; + + // Other synchronization + cuda2hipRename["cudaDeviceSynchronize"] = {"hipDeviceSynchronize", + CONV_DEV}; + cuda2hipRename["cudaThreadSynchronize"] = { + "hipDeviceSynchronize", + CONV_DEV}; // translate deprecated cudaThreadSynchronize + cuda2hipRename["cudaDeviceReset"] = {"hipDeviceReset", CONV_DEV}; + cuda2hipRename["cudaThreadExit"] = { + "hipDeviceReset", CONV_DEV}; // translate deprecated cudaThreadExit + cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV}; + cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV}; + + // Attribute + cuda2hipRename["bcudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_DEV}; + cuda2hipRename["bcudaDeviceGetAttribute"] = {"hipDeviceGetAttribute", + CONV_DEV}; + + // Device + cuda2hipRename["cudaDeviceProp"] = {"hipDeviceProp_t", CONV_DEV}; + cuda2hipRename["cudaGetDeviceProperties"] = {"hipGetDeviceProperties", + CONV_DEV}; + + // Cache config + cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", + CONV_DEV}; + cuda2hipRename["cudaThreadSetCacheConfig"] = { + "hipDeviceSetCacheConfig", CONV_DEV}; // translate deprecated + cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", + CONV_DEV}; + cuda2hipRename["cudaThreadGetCacheConfig"] = { + "hipDeviceGetCacheConfig", CONV_DEV}; // translate deprecated + cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", + CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", + CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", + CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", + CONV_DEV}; + // function + cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", + CONV_DEV}; + + cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV}; + // cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", + // CONV_DEV}; + + // Peer2Peer + cuda2hipRename["cudaDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", + CONV_DEV}; + cuda2hipRename["cudaDeviceDisablePeerAccess"] = { + "hipDeviceDisablePeerAccess", CONV_DEV}; + cuda2hipRename["cudaDeviceEnablePeerAccess"] = {"hipDeviceEnablePeerAccess", + CONV_DEV}; + cuda2hipRename["cudaMemcpyPeerAsync"] = {"hipMemcpyPeerAsync", CONV_MEM}; + cuda2hipRename["cudaMemcpyPeer"] = {"hipMemcpyPeer", CONV_MEM}; + + // Shared mem: + cuda2hipRename["cudaDeviceSetSharedMemConfig"] = { + "hipDeviceSetSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaThreadSetSharedMemConfig"] = { + "hipDeviceSetSharedMemConfig", CONV_DEV}; // translate deprecated + cuda2hipRename["cudaDeviceGetSharedMemConfig"] = { + "hipDeviceGetSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaThreadGetSharedMemConfig"] = { + "hipDeviceGetSharedMemConfig", CONV_DEV}; // translate deprecated + cuda2hipRename["cudaSharedMemConfig"] = {"hipSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeDefault"] = { + "hipSharedMemBankSizeDefault", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeFourByte"] = { + "hipSharedMemBankSizeFourByte", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeEightByte"] = { + "hipSharedMemBankSizeEightByte", CONV_DEV}; + + cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV}; + + // Profiler + // cuda2hipRename["cudaProfilerInitialize"] = "hipProfilerInitialize"; // + // see if these are called anywhere. + cuda2hipRename["cudaProfilerStart"] = {"hipProfilerStart", CONV_OTHER}; + cuda2hipRename["cudaProfilerStop"] = {"hipProfilerStop", CONV_OTHER}; + + cuda2hipRename["cudaChannelFormatDesc"] = {"hipChannelFormatDesc", + CONV_TEX}; + cuda2hipRename["cudaFilterModePoint"] = {"hipFilterModePoint", CONV_TEX}; + cuda2hipRename["cudaReadModeElementType"] = {"hipReadModeElementType", + CONV_TEX}; + + cuda2hipRename["cudaCreateChannelDesc"] = {"hipCreateChannelDesc", + CONV_TEX}; + cuda2hipRename["cudaBindTexture"] = {"hipBindTexture", CONV_TEX}; + cuda2hipRename["cudaUnbindTexture"] = {"hipUnbindTexture", CONV_TEX}; + } + + struct HipNames { + StringRef hipName; + ConvTypes countType; + }; + + SmallDenseMap cuda2hipRename; +}; + +StringRef unquoteStr(StringRef s) { + if (s.size() > 1 && s.front() == '"' && s.back() == '"') + return s.substr(1, s.size() - 2); + return s; +} + +static void processString(StringRef s, const cuda2hipMap &map, + Replacements *Replace, SourceManager &SM, + SourceLocation start, + int64_t countReps[ConvTypes::CONV_LAST]) { + size_t begin = 0; + while ((begin = s.find("cuda", begin)) != StringRef::npos) { + const size_t end = s.find_first_of(" ", begin + 4); + StringRef name = s.slice(begin, end); + const auto found = map.cuda2hipRename.find(name); + if (found != map.cuda2hipRename.end()) { + countReps[CONV_LITERAL]++; + StringRef repName = found->second.hipName; + SourceLocation sl = start.getLocWithOffset(begin + 1); + Replacement Rep(SM, sl, name.size(), repName); + Replace->insert(Rep); + } + if (end == StringRef::npos) + break; + begin = end + 1; + } +} + +struct HipifyPPCallbacks : public PPCallbacks, public SourceFileCallbacks { + HipifyPPCallbacks(Replacements *R) + : SeenEnd(false), _sm(nullptr), _pp(nullptr), Replace(R) {} + + virtual bool handleBeginSource(CompilerInstance &CI, + StringRef Filename) override { + Preprocessor &PP = CI.getPreprocessor(); + SourceManager &SM = CI.getSourceManager(); + setSourceManager(&SM); + PP.addPPCallbacks(std::unique_ptr(this)); + PP.Retain(); + setPreprocessor(&PP); + return true; + } + + virtual void InclusionDirective(SourceLocation hash_loc, + const Token &include_token, + StringRef file_name, bool is_angled, + CharSourceRange filename_range, + const FileEntry *file, StringRef search_path, + StringRef relative_path, + const clang::Module *imported) override { + if (_sm->isWrittenInMainFile(hash_loc)) { + if (is_angled) { + const auto found = N.cuda2hipRename.find(file_name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + DEBUG(dbgs() << "Include file found: " << file_name << "\n" + << "SourceLocation:" + << filename_range.getBegin().printToString(*_sm) << "\n" + << "Will be replaced with " << repName << "\n"); + SourceLocation sl = filename_range.getBegin(); + SourceLocation sle = filename_range.getEnd(); + const char *B = _sm->getCharacterData(sl); + const char *E = _sm->getCharacterData(sle); + SmallString<128> tmpData; + Replacement Rep(*_sm, sl, E - B, + Twine("<" + repName + ">").toStringRef(tmpData)); + Replace->insert(Rep); + } + } + } + } + + virtual void MacroDefined(const Token &MacroNameTok, + const MacroDirective *MD) override { + if (_sm->isWrittenInMainFile(MD->getLocation()) && + MD->getKind() == MacroDirective::MD_Define) { + for (auto T : MD->getMacroInfo()->tokens()) { + if (T.isAnyIdentifier()) { + StringRef name = T.getIdentifierInfo()->getName(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = T.getLocation(); + DEBUG(dbgs() << "Identifier " << name + << " found in definition of macro " + << MacroNameTok.getIdentifierInfo()->getName() << "\n" + << "will be replaced with: " << repName << "\n" + << "SourceLocation: " << sl.printToString(*_sm) + << "\n"); + Replacement Rep(*_sm, sl, name.size(), repName); + Replace->insert(Rep); + } + } + } + } + } + + virtual void MacroExpands(const Token &MacroNameTok, + const MacroDefinition &MD, SourceRange Range, + const MacroArgs *Args) override { + if (_sm->isWrittenInMainFile(MacroNameTok.getLocation())) { + for (unsigned int i = 0; Args && i < MD.getMacroInfo()->getNumArgs(); + i++) { + StringRef macroName = MacroNameTok.getIdentifierInfo()->getName(); + std::vector toks; + // Code below is a kind of stolen from 'MacroArgs::getPreExpArgument' + // to workaround the 'const' MacroArgs passed into this hook. + const Token *start = Args->getUnexpArgument(i); + size_t len = Args->getArgLength(start) + 1; +#if (LLVM_VERSION_MAJOR >= 3) && (LLVM_VERSION_MINOR >= 9) + _pp->EnterTokenStream(ArrayRef(start, len), false); +#else + _pp->EnterTokenStream(start, len, false, false); +#endif + do { + toks.push_back(Token()); + Token &tk = toks.back(); + _pp->Lex(tk); + } while (toks.back().isNot(tok::eof)); + _pp->RemoveTopOfLexerStack(); + // end of stolen code + for (auto tok : toks) { + if (tok.isAnyIdentifier()) { + StringRef name = tok.getIdentifierInfo()->getName(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + DEBUG(dbgs() + << "Identifier " << name + << " found as an actual argument in expansion of macro " + << macroName << "\n" + << "will be replaced with: " << repName << "\n"); + SourceLocation sl = tok.getLocation(); + Replacement Rep(*_sm, sl, name.size(), repName); + Replace->insert(Rep); + } + } + if (tok.is(tok::string_literal)) { + StringRef s(tok.getLiteralData(), tok.getLength()); + processString(unquoteStr(s), N, Replace, *_sm, tok.getLocation(), + countReps); + } + } + } + } + } + + void EndOfMainFile() override {} + + bool SeenEnd; + void setSourceManager(SourceManager *sm) { _sm = sm; } + void setPreprocessor(Preprocessor *pp) { _pp = pp; } + + int64_t countReps[ConvTypes::CONV_LAST] = {0}; + +private: + SourceManager *_sm; + Preprocessor *_pp; + + Replacements *Replace; + struct cuda2hipMap N; +}; + +class Cuda2HipCallback : public MatchFinder::MatchCallback { +public: + Cuda2HipCallback(Replacements *Replace, ast_matchers::MatchFinder *parent) + : Replace(Replace), owner(parent) {} + + void convertKernelDecl(const FunctionDecl *kernelDecl, + const MatchFinder::MatchResult &Result) { + SourceManager *SM = Result.SourceManager; + LangOptions DefaultLangOptions; + + SmallString<40> XStr; + raw_svector_ostream OS(XStr); + StringRef initialParamList; + OS << "hipLaunchParm lp"; + size_t replacementLength = OS.str().size(); + SourceLocation sl = kernelDecl->getNameInfo().getEndLoc(); + SourceLocation kernelArgListStart = clang::Lexer::findLocationAfterToken( + sl, clang::tok::l_paren, *SM, DefaultLangOptions, true); + DEBUG(dbgs() << kernelArgListStart.printToString(*SM)); + if (kernelDecl->getNumParams() > 0) { + const ParmVarDecl *pvdFirst = kernelDecl->getParamDecl(0); + const ParmVarDecl *pvdLast = + kernelDecl->getParamDecl(kernelDecl->getNumParams() - 1); + SourceLocation kernelArgListStart(pvdFirst->getLocStart()); + SourceLocation kernelArgListEnd(pvdLast->getLocEnd()); + SourceLocation stop = clang::Lexer::getLocForEndOfToken( + kernelArgListEnd, 0, *SM, DefaultLangOptions); + replacementLength += + SM->getCharacterData(stop) - SM->getCharacterData(kernelArgListStart); + initialParamList = StringRef(SM->getCharacterData(kernelArgListStart), + replacementLength); + OS << ", " << initialParamList; + } + DEBUG(dbgs() << "initial paramlist: " << initialParamList << "\n" + << "new paramlist: " << OS.str() << "\n"); + Replacement Rep0(*(Result.SourceManager), kernelArgListStart, + replacementLength, OS.str()); + Replace->insert(Rep0); + } + + void run(const MatchFinder::MatchResult &Result) override { + SourceManager *SM = Result.SourceManager; + LangOptions DefaultLangOptions; + + if (const CallExpr *call = + Result.Nodes.getNodeAs("cudaCall")) { + const FunctionDecl *funcDcl = call->getDirectCallee(); + StringRef name = funcDcl->getDeclName().getAsString(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = call->getLocStart(); + Replacement Rep(*SM, SM->isMacroArgExpansion(sl) + ? SM->getImmediateSpellingLoc(sl) + : sl, + name.size(), repName); + Replace->insert(Rep); + } + } + + if (const CUDAKernelCallExpr *launchKernel = + Result.Nodes.getNodeAs( + "cudaLaunchKernel")) { + SmallString<40> XStr; + raw_svector_ostream OS(XStr); + StringRef calleeName; + const FunctionDecl *kernelDecl = launchKernel->getDirectCallee(); + if (kernelDecl) { + calleeName = kernelDecl->getName(); + convertKernelDecl(kernelDecl, Result); + } else { + const Expr *e = launchKernel->getCallee(); + if (const UnresolvedLookupExpr *ule = + dyn_cast(e)) { + calleeName = ule->getName().getAsIdentifierInfo()->getName(); + owner->addMatcher(functionTemplateDecl(hasName(calleeName)) + .bind("unresolvedTemplateName"), + this); + } + } + + XStr.clear(); + OS << "hipLaunchKernel(HIP_KERNEL_NAME(" << calleeName << "),"; + + const CallExpr *config = launchKernel->getConfig(); + DEBUG(dbgs() << "Kernel config arguments:" + << "\n"); + for (unsigned argno = 0; argno < config->getNumArgs(); argno++) { + const Expr *arg = config->getArg(argno); + if (!isa(arg)) { + const ParmVarDecl *pvd = + config->getDirectCallee()->getParamDecl(argno); + + SourceLocation sl(arg->getLocStart()); + SourceLocation el(arg->getLocEnd()); + SourceLocation stop = + clang::Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); + StringRef outs(SM->getCharacterData(sl), + SM->getCharacterData(stop) - SM->getCharacterData(sl)); + DEBUG(dbgs() << "args[ " << argno << "]" << outs << " <" + << pvd->getType().getAsString() << ">" + << "\n"); + if (pvd->getType().getAsString().compare("dim3") == 0) + OS << " dim3(" << outs << "),"; + else + OS << " " << outs << ","; + } else + OS << " 0,"; + } + + for (unsigned argno = 0; argno < launchKernel->getNumArgs(); argno++) { + const Expr *arg = launchKernel->getArg(argno); + SourceLocation sl(arg->getLocStart()); + SourceLocation el(arg->getLocEnd()); + SourceLocation stop = + clang::Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); + std::string outs(SM->getCharacterData(sl), + SM->getCharacterData(stop) - SM->getCharacterData(sl)); + DEBUG(dbgs() << outs << "\n"); + OS << " " << outs << ","; + } + XStr.pop_back(); + OS << ")"; + size_t length = + SM->getCharacterData(clang::Lexer::getLocForEndOfToken( + launchKernel->getLocEnd(), 0, *SM, DefaultLangOptions)) - + SM->getCharacterData(launchKernel->getLocStart()); + Replacement Rep(*SM, launchKernel->getLocStart(), length, OS.str()); + Replace->insert(Rep); + countReps[ConvTypes::CONV_KERN]++; + } + + if (const FunctionTemplateDecl *templateDecl = + Result.Nodes.getNodeAs( + "unresolvedTemplateName")) { + FunctionDecl *kernelDecl = templateDecl->getTemplatedDecl(); + convertKernelDecl(kernelDecl, Result); + } + + if (const MemberExpr *threadIdx = + Result.Nodes.getNodeAs("cudaBuiltin")) { + if (const OpaqueValueExpr *refBase = + dyn_cast(threadIdx->getBase())) { + if (const DeclRefExpr *declRef = + dyn_cast(refBase->getSourceExpr())) { + StringRef name = declRef->getDecl()->getName(); + StringRef memberName = threadIdx->getMemberDecl()->getName(); + size_t pos = memberName.find_first_not_of("__fetch_builtin_"); + memberName = memberName.slice(pos, memberName.size()); + SmallString<128> tmpData; + name = Twine(name + "." + memberName).toStringRef(tmpData); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = threadIdx->getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + } + } + + if (const DeclRefExpr *cudaEnumConstantRef = + Result.Nodes.getNodeAs("cudaEnumConstantRef")) { + StringRef name = cudaEnumConstantRef->getDecl()->getNameAsString(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = cudaEnumConstantRef->getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + + if (const VarDecl *cudaEnumConstantDecl = + Result.Nodes.getNodeAs("cudaEnumConstantDecl")) { + StringRef name = + cudaEnumConstantDecl->getType()->getAsTagDecl()->getNameAsString(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = cudaEnumConstantDecl->getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + + if (const VarDecl *cudaStructVar = + Result.Nodes.getNodeAs("cudaStructVar")) { + StringRef name = cudaStructVar->getType() + ->getAsStructureType() + ->getDecl() + ->getNameAsString(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + TypeLoc TL = cudaStructVar->getTypeSourceInfo()->getTypeLoc(); + SourceLocation sl = TL.getUnqualifiedLoc().getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + + if (const VarDecl *cudaStructVarPtr = + Result.Nodes.getNodeAs("cudaStructVarPtr")) { + const Type *t = cudaStructVarPtr->getType().getTypePtrOrNull(); + if (t) { + StringRef name = t->getPointeeCXXRecordDecl()->getName(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + TypeLoc TL = cudaStructVarPtr->getTypeSourceInfo()->getTypeLoc(); + SourceLocation sl = TL.getUnqualifiedLoc().getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + } + + if (const ParmVarDecl *cudaParamDecl = + Result.Nodes.getNodeAs("cudaParamDecl")) { + QualType QT = cudaParamDecl->getOriginalType().getUnqualifiedType(); + StringRef name = QT.getAsString(); + const Type *t = QT.getTypePtr(); + if (t->isStructureOrClassType()) { + name = t->getAsCXXRecordDecl()->getName(); + } + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + TypeLoc TL = cudaParamDecl->getTypeSourceInfo()->getTypeLoc(); + SourceLocation sl = TL.getUnqualifiedLoc().getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + + if (const ParmVarDecl *cudaParamDeclPtr = + Result.Nodes.getNodeAs("cudaParamDeclPtr")) { + const Type *pt = cudaParamDeclPtr->getType().getTypePtrOrNull(); + if (pt) { + QualType QT = pt->getPointeeType(); + const Type *t = QT.getTypePtr(); + StringRef name = t->isStructureOrClassType() + ? t->getAsCXXRecordDecl()->getName() + : StringRef(QT.getAsString()); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + TypeLoc TL = cudaParamDeclPtr->getTypeSourceInfo()->getTypeLoc(); + SourceLocation sl = TL.getUnqualifiedLoc().getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + } + + if (const StringLiteral *stringLiteral = + Result.Nodes.getNodeAs("stringLiteral")) { + if (stringLiteral->getCharByteWidth() == 1) { + StringRef s = stringLiteral->getString(); + processString(s, N, Replace, *SM, stringLiteral->getLocStart(), + countReps); + } + } + + if (const UnaryExprOrTypeTraitExpr *expr = + Result.Nodes.getNodeAs( + "cudaStructSizeOf")) { + TypeSourceInfo *typeInfo = expr->getArgumentTypeInfo(); + QualType QT = typeInfo->getType().getUnqualifiedType(); + const Type *type = QT.getTypePtr(); + StringRef name = type->getAsCXXRecordDecl()->getName(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + TypeLoc TL = typeInfo->getTypeLoc(); + SourceLocation sl = TL.getUnqualifiedLoc().getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + } + + int64_t countReps[ConvTypes::CONV_LAST] = {0}; + +private: + Replacements *Replace; + ast_matchers::MatchFinder *owner; + struct cuda2hipMap N; +}; + +} // end anonymous namespace + +// Set up the command line options +static cl::OptionCategory + ToolTemplateCategory("CUDA to HIP source translator options"); +static cl::extrahelp MoreHelp(" specify the path of source file\n\n"); + +static cl::opt OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ToolTemplateCategory)); + +static cl::opt + Inplace("inplace", + cl::desc("Modify input file inplace, replacing input with hipified " + "output, save backup in .prehip file. "), + cl::value_desc("inplace"), cl::cat(ToolTemplateCategory)); + +static cl::opt + NoOutput("no-output", + cl::desc("don't write any translated output to stdout"), + cl::value_desc("no-output"), cl::cat(ToolTemplateCategory)); +static cl::opt + PrintStats("print-stats", cl::desc("print the command-line, like a header"), + cl::value_desc("print-stats"), cl::cat(ToolTemplateCategory)); + +int main(int argc, const char **argv) { + + llvm::sys::PrintStackTraceOnErrorSignal(); + + int Result; + + CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, + llvm::cl::Required); + std::string dst = OutputFilename; + std::vector fileSources = OptionsParser.getSourcePathList(); + if (dst.empty()) { + dst = fileSources[0]; + if (!Inplace) { + size_t pos = dst.rfind(".cu"); + if (pos != std::string::npos) { + dst = dst.substr(0, pos) + ".hip.cu"; + } else { + llvm::errs() << "Input .cu file was not specified.\n"; + return 1; + } + } + } else { + if (Inplace) { + llvm::errs() << "Conflict: both -o and -inplace options are specified."; + } + dst += ".cu"; + } + + // copy source file since tooling makes changes "inplace" + std::ifstream source(fileSources[0], std::ios::binary); + std::ofstream dest(Inplace ? dst + ".prehip" : dst, std::ios::binary); + dest << source.rdbuf(); + source.close(); + dest.close(); + + RefactoringTool Tool(OptionsParser.getCompilations(), dst); + ast_matchers::MatchFinder Finder; + Cuda2HipCallback Callback(&Tool.getReplacements(), &Finder); + HipifyPPCallbacks PPCallbacks(&Tool.getReplacements()); + Finder.addMatcher(callExpr(isExpansionInMainFile(), + callee(functionDecl(matchesName("cuda.*")))) + .bind("cudaCall"), + &Callback); + Finder.addMatcher(cudaKernelCallExpr().bind("cudaLaunchKernel"), &Callback); + Finder.addMatcher(memberExpr(isExpansionInMainFile(), + hasObjectExpression(hasType(cxxRecordDecl( + matchesName("__cuda_builtin_"))))) + .bind("cudaBuiltin"), + &Callback); + Finder.addMatcher(declRefExpr(isExpansionInMainFile(), + to(enumConstantDecl(matchesName("cuda.*")))) + .bind("cudaEnumConstantRef"), + &Callback); + Finder.addMatcher( + varDecl(isExpansionInMainFile(), hasType(enumDecl(matchesName("cuda.*")))) + .bind("cudaEnumConstantDecl"), + &Callback); + Finder.addMatcher(varDecl(isExpansionInMainFile(), + hasType(cxxRecordDecl(matchesName("cuda.*")))) + .bind("cudaStructVar"), + &Callback); + Finder.addMatcher( + varDecl(isExpansionInMainFile(), + hasType(pointsTo(cxxRecordDecl(matchesName("cuda.*"))))) + .bind("cudaStructVarPtr"), + &Callback); + Finder.addMatcher(parmVarDecl(isExpansionInMainFile(), + hasType(namedDecl(matchesName("cuda.*")))) + .bind("cudaParamDecl"), + &Callback); + Finder.addMatcher( + parmVarDecl(isExpansionInMainFile(), + hasType(pointsTo(namedDecl(matchesName("cuda.*"))))) + .bind("cudaParamDeclPtr"), + &Callback); + Finder.addMatcher(expr(isExpansionInMainFile(), + sizeOfExpr(hasArgumentOfType(recordType(hasDeclaration( + cxxRecordDecl(matchesName("cuda.*"))))))) + .bind("cudaStructSizeOf"), + &Callback); + Finder.addMatcher( + stringLiteral(isExpansionInMainFile()).bind("stringLiteral"), &Callback); + + auto action = newFrontendActionFactory(&Finder, &PPCallbacks); + + std::vector compilationStages; + compilationStages.push_back("--cuda-host-only"); + //compilationStages.push_back("--cuda-device-only"); + + for (auto Stage : compilationStages) { + Tool.appendArgumentsAdjuster( + getInsertArgumentAdjuster(Stage, ArgumentInsertPosition::BEGIN)); + Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("-std=c++11")); +#if defined(HIPIFY_CLANG_RES) + Tool.appendArgumentsAdjuster( + getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES)); +#endif + Tool.appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster()); + Result = Tool.run(action.get()); + + Tool.clearArgumentsAdjusters(); + } + + LangOptions DefaultLangOptions; + IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); + TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts); + DiagnosticsEngine Diagnostics( + IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, + &DiagnosticPrinter, false); + SourceManager Sources(Diagnostics, Tool.getFiles()); + + DEBUG(dbgs() << "Replacements collected by the tool:\n"); + for (const auto &r : Tool.getReplacements()) { + DEBUG(dbgs() << r.toString() << "\n"); + } + + Rewriter Rewrite(Sources, DefaultLangOptions); + + if (!Tool.applyAllReplacements(Rewrite)) { + DEBUG(dbgs() << "Skipped some replacements.\n"); + } + + Result = Rewrite.overwriteChangedFiles(); + + if (!Inplace) { + size_t pos = dst.rfind(".cu"); + if (pos != std::string::npos) { + rename(dst.c_str(), dst.substr(0, pos).c_str()); + } + } + if (PrintStats) { + int64_t sum = 0; + for (int i = 0; i < ConvTypes::CONV_LAST; i++) { + sum += Callback.countReps[i] + PPCallbacks.countReps[i]; + } + llvm::outs() << "info: converted " << sum << " CUDA->HIP refs ( "; + for (int i = 0; i < ConvTypes::CONV_LAST; i++) { + llvm::outs() << counterNames[i] << ':' + << Callback.countReps[i] + PPCallbacks.countReps[i] << ' '; + } + llvm::outs() << ") in \'" << fileSources[0] << "\'\n"; + } + return Result; +} diff --git a/projects/clr/hipamd/tests/clang-hipify/axpy.cu b/projects/clr/hipamd/tests/clang-hipify/axpy.cu new file mode 100644 index 0000000000..8472e60209 --- /dev/null +++ b/projects/clr/hipamd/tests/clang-hipify/axpy.cu @@ -0,0 +1,45 @@ +// RUN: hipify "%s" -o=%t -- + +#include + +__global__ void axpy(float a, float* x, float* y) { + // RUN: sh -c "test `grep -c -F 'y[hipThreadIdx_x] = a * x[hipThreadIdx_x];' %t` -eq 2" + y[threadIdx.x] = a * x[threadIdx.x]; +} + +int main(int argc, char* argv[]) { + const int kDataLen = 4; + + float a = 2.0f; + float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; + float host_y[kDataLen]; + + // Copy input data to device. + float* device_x; + float* device_y; + // RUN: sh -c "test `grep -c -F 'hipMalloc(&device_x, kDataLen * sizeof(float));' %t` -eq 2" + cudaMalloc(&device_x, kDataLen * sizeof(float)); + // RUN: sh -c "test `grep -c -F 'hipMalloc(&device_y, kDataLen * sizeof(float));' %t` -eq 2" + cudaMalloc(&device_y, kDataLen * sizeof(float)); + // RUN: sh -c "test `grep -c -F 'hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice);' %t` -eq 2" + cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); + + // Launch the kernel. + // RUN: sh -c "test `grep -c -F 'hipLaunchKernel(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y);' %t` -eq 2" + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + // Copy output data to host. + // RUN: sh -c "test `grep -c -F 'hipDeviceSynchronize();' %t` -eq 2" + cudaDeviceSynchronize(); + // RUN: sh -c "test `grep -c -F 'hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost);' %t` -eq 2" + cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); + + // Print the results. + for (int i = 0; i < kDataLen; ++i) { + std::cout << "y[" << i << "] = " << host_y[i] << "\n"; + } + + // RUN: sh -c "test `grep -c -F 'hipDeviceReset();' %t` -eq 2" + cudaDeviceReset(); + return 0; +} diff --git a/projects/clr/hipamd/tests/clang-hipify/lit.cfg b/projects/clr/hipamd/tests/clang-hipify/lit.cfg new file mode 100644 index 0000000000..c57b8ec524 --- /dev/null +++ b/projects/clr/hipamd/tests/clang-hipify/lit.cfg @@ -0,0 +1,48 @@ +# -*- Python -*- +import os +import platform +import re +import subprocess + +import lit.formats +import lit.util + +# Configuration file for the 'lit' test runner. + +# name: The name of this test suite. +config.name = 'hipify' + +# suffixes: CUDA source is only supported +config.suffixes = ['.cu'] + +# testFormat: The test format to use to interpret tests. +config.test_format = lit.formats.ShTest() + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The path where tests are located (default is the test suite root). +#config.test_exec_root = config.test_source_root + +# target_triple: Used by ShTest and TclTest formats for XFAIL checks. +config.target_triple = '(unused)' + +# available_features: Used by ShTest and TclTest formats for REQUIRES checks. +config.available_features = [] + +site_cfg = lit_config.params.get('site_config', None) +lit_config.load_config(config, site_cfg) + +obj_root = getattr(config, 'obj_root', None) +if obj_root is not None: + config.test_exec_root = obj_root + +if obj_root is not None: + llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) + if not llvm_tools_dir: + lit_config.fatal('No LLVM tools dir set!') + path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) + config.environment['PATH'] = path + +config.substitutions.append(("hipify", obj_root+"/hipify-clang")) + diff --git a/projects/clr/hipamd/tests/clang-hipify/lit.site.cfg.in b/projects/clr/hipamd/tests/clang-hipify/lit.site.cfg.in new file mode 100644 index 0000000000..4511316ac7 --- /dev/null +++ b/projects/clr/hipamd/tests/clang-hipify/lit.site.cfg.in @@ -0,0 +1,15 @@ +import sys + +config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" +config.obj_root = "@BINARY_DIR@" + +# Support substitution of the tools and libs dirs with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params + config.obj_root = config.obj_root % lit_config.params +except KeyError: + e = sys.exc_info()[1] + key, = e.args + lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) +