From 3722d5b4b91908defd1131ed10d3ecaec7da094c Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Thu, 19 Sep 2019 19:33:42 +0300 Subject: [PATCH] [HIPIFY][#1435] Add HIP_SYMBOL wrapper to the templated Device Symbol argument of the following functions: cudaMemcpyToSymbol, cudaMemcpyToSymbolAsync, cudaGetSymbolSize, cudaGetSymbolAddress, cudaMemcpyFromSymbol, cudaMemcpyFromSymbolAsync + Add a corresponding cudaSymbolFuncCall matcher. + Add device_symbols.cu test for the above 6 functions, update existed. + Fix dim3() type cast issue, update affected tests. TODO: Do the same in hipify-perl --- hipify-clang/src/HipifyAction.cpp | 87 +++++++++- hipify-clang/src/HipifyAction.h | 1 + .../unit_tests/device/device_symbols.cu | 152 ++++++++++++++++++ .../cuRAND/benchmark_curand_kernel.cpp | 4 +- .../libraries/cuRAND/poisson_api_example.cu | 2 +- 5 files changed, 241 insertions(+), 5 deletions(-) create mode 100644 tests/hipify-clang/unit_tests/device/device_symbols.cu diff --git a/hipify-clang/src/HipifyAction.cpp b/hipify-clang/src/HipifyAction.cpp index e52c4cd2e9..6cb75f8911 100644 --- a/hipify-clang/src/HipifyAction.cpp +++ b/hipify-clang/src/HipifyAction.cpp @@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include +#include #include "HipifyAction.h" #include "clang/Basic/SourceLocation.h" #include "clang/Frontend/CompilerInstance.h" @@ -34,6 +36,25 @@ THE SOFTWARE. namespace ct = clang::tooling; namespace mat = clang::ast_matchers; +const std::string sCudaMemcpyToSymbol = "cudaMemcpyToSymbol"; +const std::string sCudaMemcpyToSymbolAsync = "cudaMemcpyToSymbolAsync"; +const std::string sCudaGetSymbolSize = "cudaGetSymbolSize"; +const std::string sCudaGetSymbolAddress = "cudaGetSymbolAddress"; +const std::string sCudaMemcpyFromSymbol = "cudaMemcpyFromSymbol"; +const std::string sCudaMemcpyFromSymbolAsync = "cudaMemcpyFromSymbolAsync"; + +const std::set DeviceSymbolFunctions0 { + {sCudaMemcpyToSymbol}, + {sCudaMemcpyToSymbolAsync} +}; + +const std::set DeviceSymbolFunctions1 { + {sCudaGetSymbolSize}, + {sCudaGetSymbolAddress}, + {sCudaMemcpyFromSymbol}, + {sCudaMemcpyFromSymbolAsync} +}; + void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { clang::SourceManager& SM = getCompilerInstance().getSourceManager(); size_t begin = 0; @@ -316,8 +337,12 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. // Copy the two dimensional arguments verbatim. - OS << "dim3(" << readSourceText(*SM, config->getArg(0)->getSourceRange()) << "), "; - OS << "dim3(" << readSourceText(*SM, config->getArg(1)->getSourceRange()) << "), "; + std::string sDim3 = "dim3("; + for (unsigned int i = 0; i < 2; ++i) { + const std::string sArg = readSourceText(*SM, config->getArg(i)->getSourceRange()).str(); + bool bDim3 = std::equal(sDim3.begin(), sDim3.end(), sArg.c_str()); + OS << (bDim3 ? "" : sDim3) << sArg << (bDim3 ? "" : ")") << ", "; + } // The stream/memory arguments default to zero if omitted. OS << stringifyZeroDefaultedArg(*SM, config->getArg(2)) << ", "; OS << stringifyZeroDefaultedArg(*SM, config->getArg(3)); @@ -395,11 +420,50 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match bool HipifyAction::cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) { if (const clang::CallExpr *call = Result.Nodes.getNodeAs("cudaDeviceFuncCall")) { const clang::FunctionDecl *funcDcl = call->getDirectCallee(); + if (!funcDcl) { + return true; + } FindAndReplace(funcDcl->getDeclName().getAsString(), llcompat::getBeginLoc(call), CUDA_DEVICE_FUNC_MAP, false); } return true; } +bool HipifyAction::cudaSymbolFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) { + if (const clang::CallExpr * call = Result.Nodes.getNodeAs("cudaSymbolFuncCall")) { + if (!call->getNumArgs()) { + return true; + } + const clang::FunctionDecl* funcDcl = call->getDirectCallee(); + if (!funcDcl) { + return true; + } + std::string sName = funcDcl->getDeclName().getAsString(); + unsigned int argNum = 0; + if (DeviceSymbolFunctions0.find(sName) != DeviceSymbolFunctions0.end()) { + argNum = 0; + } else if (call->getNumArgs() > 1 && DeviceSymbolFunctions1.find(sName) != DeviceSymbolFunctions1.end()) { + argNum = 1; + } else { + return true; + } + clang::SmallString<40> XStr; + llvm::raw_svector_ostream OS(XStr); + clang::SourceRange sr = call->getArg(argNum)->getSourceRange(); + clang::SourceManager* SM = Result.SourceManager; + const std::string sSymbol = "HIP_SYMBOL"; + OS << sSymbol << "(" << readSourceText(*SM, sr) << ")"; + clang::SourceRange replacementRange = getWriteRange(*SM, { sr.getBegin(), sr.getEnd() }); + clang::SourceLocation s = replacementRange.getBegin(); + clang::SourceLocation e = replacementRange.getEnd(); + clang::LangOptions DefaultLangOptions; + size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(e, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(s); + ct::Replacement Rep(*SM, s, length, OS.str()); + clang::FullSourceLoc fullSL(s, *SM); + insertReplacement(Rep, fullSL); + } + return true; +} + void HipifyAction::insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL) { llcompat::insertReplacement(*replacements, rep); if (PrintStats) { @@ -423,6 +487,24 @@ std::unique_ptr HipifyAction::CreateASTConsumer(clang::Compi ).bind("cudaSharedIncompleteArrayVar"), this ); + Finder->addMatcher( + mat::callExpr( + mat::isExpansionInMainFile(), + mat::callee( + mat::functionDecl( + mat::hasAnyName( + sCudaGetSymbolAddress, + sCudaGetSymbolSize, + sCudaMemcpyFromSymbol, + sCudaMemcpyFromSymbolAsync, + sCudaMemcpyToSymbol, + sCudaMemcpyToSymbolAsync + ) + ) + ) + ).bind("cudaSymbolFuncCall"), + this + ); Finder->addMatcher( mat::callExpr( mat::isExpansionInMainFile(), @@ -560,5 +642,6 @@ void HipifyAction::ExecuteAction() { void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Result) { if (cudaLaunchKernel(Result)) return; if (cudaSharedIncompleteArrayVar(Result)) return; + if (cudaSymbolFuncCall(Result)) return; if (cudaDeviceFuncCall(Result)) return; } diff --git a/hipify-clang/src/HipifyAction.h b/hipify-clang/src/HipifyAction.h index 208e6fb0b2..6c04e2c0cc 100644 --- a/hipify-clang/src/HipifyAction.h +++ b/hipify-clang/src/HipifyAction.h @@ -71,6 +71,7 @@ public: bool cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result); bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result); bool cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result); + bool cudaSymbolFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result); // Called by the preprocessor for each include directive during the non-raw lexing pass. void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, diff --git a/tests/hipify-clang/unit_tests/device/device_symbols.cu b/tests/hipify-clang/unit_tests/device/device_symbols.cu new file mode 100644 index 0000000000..b58abeda46 --- /dev/null +++ b/tests/hipify-clang/unit_tests/device/device_symbols.cu @@ -0,0 +1,152 @@ +// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args + +/* +Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// CHECK: #include +#include +#include +#include + +#define NUM 1024 +#define SIZE 1024 * 4 + +__device__ int globalIn[NUM]; +__device__ int globalOut[NUM]; + +__global__ void Assign(int* Out) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Out[tid] = globalIn[tid]; + globalOut[tid] = globalIn[tid]; +} + +__device__ __constant__ int globalConst[NUM]; + +__global__ void checkAddress(int* addr, bool* out) { + *out = (globalConst == addr); +} + +int main() { + int *A, *Am, *B, *Ad, *C, *Cm; + A = new int[NUM]; + B = new int[NUM]; + C = new int[NUM]; + for (int i = 0; i < NUM; ++i) { + A[i] = -1 * i; + B[i] = 0; + C[i] = 0; + } + // CHECK: hipMalloc((void**)&Ad, SIZE); + cudaMalloc((void**)&Ad, SIZE); + // CHECK: hipHostMalloc((void**)&Am, SIZE); + cudaMallocHost((void**)&Am, SIZE); + // CHECK: hipHostMalloc((void**)&Cm, SIZE); + cudaMallocHost((void**)&Cm, SIZE); + for (int i = 0; i < NUM; ++i) { + Am[i] = -1 * i; + Cm[i] = 0; + } + // CHECK: hipStream_t stream = NULL; + cudaStream_t stream = NULL; + // CHECK: hipStreamCreate(&stream); + cudaStreamCreate(&stream); + // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream); + cudaMemcpyToSymbolAsync(globalIn, Am, SIZE, 0, cudaMemcpyHostToDevice, stream); + // CHECK: hipStreamSynchronize(stream); + cudaStreamSynchronize(stream); + // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + Assign<<>>(Ad); + // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); + cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); + // CHECK: hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); + cudaMemcpyFromSymbolAsync(Cm, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); + // CHECK: hipStreamSynchronize(stream); + cudaStreamSynchronize(stream); + for (int i = 0; i < NUM; ++i) { + assert(Am[i] == B[i]); + assert(Am[i] == Cm[i]); + } + for (int i = 0; i < NUM; ++i) { + A[i] = -2 * i; + B[i] = 0; + } + // CHECK: hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice); + cudaMemcpyToSymbol(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice); + // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + Assign<<>>(Ad); + // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); + cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); + // CHECK: hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost); + cudaMemcpyFromSymbol(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost); + for (int i = 0; i < NUM; ++i) { + assert(A[i] == B[i]); + assert(A[i] == C[i]); + } + for (int i = 0; i < NUM; ++i) { + A[i] = -3 * i; + B[i] = 0; + } + // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream); + cudaMemcpyToSymbolAsync(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice, stream); + // CHECK: hipStreamSynchronize(stream); + cudaStreamSynchronize(stream); + // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); + Assign<<>>(Ad); + // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); + cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); + // CHECK: hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); + cudaMemcpyFromSymbolAsync(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); + // CHECK: hipStreamSynchronize(stream); + cudaStreamSynchronize(stream); + for (int i = 0; i < NUM; ++i) { + assert(A[i] == B[i]); + assert(A[i] == C[i]); + } + bool *checkOkD; + bool checkOk = false; + size_t symbolSize = 0; + int *symbolAddress; + // CHECK: hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst)); + cudaGetSymbolSize(&symbolSize, globalConst); + // CHECK: hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst)); + cudaGetSymbolAddress((void**) &symbolAddress, globalConst); + // CHECK: hipMalloc((void**)&checkOkD, sizeof(bool)); + cudaMalloc((void**)&checkOkD, sizeof(bool)); + // CHECK: hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); + checkAddress<<>>(symbolAddress, checkOkD); + // CHECK: hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost); + cudaMemcpy(&checkOk, checkOkD, sizeof(bool), cudaMemcpyDeviceToHost); + // CHECK: hipFree(checkOkD); + cudaFree(checkOkD); + assert(checkOk); + assert(symbolSize == SIZE); + // CHECK: hipHostFree(Am); + cudaFreeHost(Am); + // CHECK: hipHostFree(Cm); + cudaFreeHost(Cm); + // CHECK: hipFree(Ad); + cudaFree(Ad); + delete[] A; + delete[] B; + delete[] C; + return 0; +} diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp index 74bf4b5a0d..ece384f04b 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp +++ b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp @@ -304,7 +304,7 @@ struct runner CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice)); const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL((init_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, directions, offset); + // CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset); init_kernel<<>>(states, directions, offset); // CHECK: CUDA_CALL(hipPeekAtLastError()); // CHECK: CUDA_CALL(hipDeviceSynchronize()); @@ -329,7 +329,7 @@ struct runner const Extra extra) { const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL((generate_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); + // CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); generate_kernel<<>>(states, data, size / dimensions, generate_func, extra); } }; diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu b/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu index f4fd05ba48..567de05e6e 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu +++ b/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu @@ -247,7 +247,7 @@ API_TYPE set_API_type() void settings() { add_cachiers(cashiers_load); - // CHECK: hipMemcpyToSymbol("cashiers_load", cashiers_load_h, + // CHECK: hipMemcpyToSymbol(HIP_SYMBOL("cashiers_load"), cashiers_load_h, // CHECK: HOURS * sizeof(int), 0, hipMemcpyHostToDevice); cudaMemcpyToSymbol("cashiers_load", cashiers_load_h, HOURS * sizeof(int), 0, cudaMemcpyHostToDevice);