diff --git a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp index d5fd29688f..fd23344f9f 100644 --- a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp +++ b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp @@ -471,6 +471,49 @@ private: return true; } + SourceRange getReadRange(clang::SourceManager &SM, const SourceRange &exprRange) { + SourceLocation begin = exprRange.getBegin(); + SourceLocation end = exprRange.getEnd(); + + bool beginSafe = !SM.isMacroBodyExpansion(begin) || Lexer::isAtStartOfMacroExpansion(begin, SM, LangOptions{}); + bool endSafe = !SM.isMacroBodyExpansion(end) || Lexer::isAtEndOfMacroExpansion(end, SM, LangOptions{}); + + if (beginSafe && endSafe) { + return {SM.getFileLoc(begin), SM.getFileLoc(end)}; + } else { + return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; + } + } + + SourceRange getWriteRange(clang::SourceManager &SM, const SourceRange &exprRange) { + SourceLocation begin = exprRange.getBegin(); + SourceLocation end = exprRange.getEnd(); + + // If the range is contained within a macro, update the macro definition. + // Otherwise, use the file location and hope for the best. + if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) { + return {SM.getFileLoc(begin), SM.getFileLoc(end)}; + } + + return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; + } + + StringRef readSourceText(clang::SourceManager& SM, const SourceRange& exprRange) { + return Lexer::getSourceText(CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, LangOptions(), nullptr); + } + + /** + * Get a string representation of the expression `arg`, unless it's a defaulting function + * call argument, in which case get a 0. Used for building argument lists to kernel calls. + */ + std::string stringifyZeroDefaultedArg(SourceManager& SM, const Expr* arg) { + if (isa(arg)) { + return "0"; + } else { + return readSourceText(SM, arg->getSourceRange()); + } + } + bool cudaLaunchKernel(const MatchFinder::MatchResult &Result) { StringRef refName = "cudaLaunchKernel"; if (const CUDAKernelCallExpr *launchKernel = Result.Nodes.getNodeAs(refName)) { @@ -480,59 +523,46 @@ private: LangOptions DefaultLangOptions; SourceManager *SM = Result.SourceManager; - const Expr *e = launchKernel->getCallee(); + const Expr& calleeExpr = *(launchKernel->getCallee()); + OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", "; - // Grab the characters for the callee expression and dump them into hipLaunchKernelGGL's - // first argument. - StringRef exprSource = Lexer::getSourceText(CharSourceRange::getTokenRange(e->getSourceRange()), *SM, LangOptions(), 0); + // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. + const CallExpr& config = *(launchKernel->getConfig()); - OS << "hipLaunchKernelGGL(" << exprSource << ","; - const CallExpr *config = launchKernel->getConfig(); - DEBUG(dbgs() << "Kernel config arguments:" << "\n"); - for (unsigned argno = 0; argno < config->getNumArgs(); argno++) { - const Expr *arg = config->getArg(argno); - if (!isa(arg)) { - const ParmVarDecl *pvd = config->getDirectCallee()->getParamDecl(argno); - SourceLocation sl(arg->getLocStart()); - SourceLocation el(arg->getLocEnd()); - SourceLocation stop = Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); - StringRef outs(SM->getCharacterData(sl), SM->getCharacterData(stop) - SM->getCharacterData(sl)); - DEBUG(dbgs() << "args[ " << argno << "]" << outs << " <" << pvd->getType().getAsString() << ">\n"); - if (pvd->getType().getAsString().compare("dim3") == 0) { - OS << " dim3(" << outs << "),"; - } else { - OS << " " << outs << ","; - } - } else { - OS << " 0,"; - } + // Copy the two dimensional arguments verbatim. + OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), "; + OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), "; + + // The stream/memory arguments default to zero if omitted. + OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", "; + OS << stringifyZeroDefaultedArg(*SM, config.getArg(3)); + + // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. + int numArgs = launchKernel->getNumArgs(); + if (numArgs > 0) { + OS << ", "; + + // Start of the first argument. + SourceLocation argStart = launchKernel->getArg(0)->getLocStart(); + + // End of the last argument. + SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd(); + + OS << readSourceText(*SM, {argStart, argEnd}); } - for (unsigned argno = 0; argno < launchKernel->getNumArgs(); argno++) { - const Expr *arg = launchKernel->getArg(argno); - SourceLocation sl(arg->getLocStart()); - if (SM->isMacroBodyExpansion(sl)) { - sl = SM->getExpansionLoc(sl); - } else if (SM->isMacroArgExpansion(sl)) { - sl = SM->getImmediateSpellingLoc(sl); - } - SourceLocation el(arg->getLocEnd()); - if (SM->isMacroBodyExpansion(el)) { - el = SM->getExpansionLoc(el); - } else if (SM->isMacroArgExpansion(el)) { - el = SM->getImmediateSpellingLoc(el); - } - SourceLocation stop = Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); - std::string outs(SM->getCharacterData(sl), SM->getCharacterData(stop) - SM->getCharacterData(sl)); - DEBUG(dbgs() << outs << "\n"); - OS << " " << outs << ","; - } - XStr.pop_back(); + OS << ")"; + + SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()}); + SourceLocation launchStart = replacementRange.getBegin(); + SourceLocation launchEnd = replacementRange.getEnd(); + size_t length = SM->getCharacterData(Lexer::getLocForEndOfToken( - launchKernel->getLocEnd(), 0, *SM, DefaultLangOptions)) - - SM->getCharacterData(launchKernel->getLocStart()); - Replacement Rep(*SM, launchKernel->getLocStart(), length, OS.str()); - FullSourceLoc fullSL(launchKernel->getLocStart(), *SM); + launchEnd, 0, *SM, DefaultLangOptions)) - + SM->getCharacterData(launchStart); + + Replacement Rep(*SM, launchStart, length, OS.str()); + FullSourceLoc fullSL(launchStart, *SM); insertReplacement(Rep, fullSL); hipCounter counter = {"hipLaunchKernelGGL", CONV_KERN, API_RUNTIME}; updateCounters(counter, refName.str()); diff --git a/projects/clr/hipamd/tests/hipify-clang/axpy.cu b/projects/clr/hipamd/tests/hipify-clang/axpy.cu index 8c6b0e0d8d..2fd62ac344 100644 --- a/projects/clr/hipamd/tests/hipify-clang/axpy.cu +++ b/projects/clr/hipamd/tests/hipify-clang/axpy.cu @@ -2,11 +2,23 @@ #include -__global__ void axpy(float a, float* x, float* y) { + +#define TOKEN_PASTE(X, Y) X ## Y +#define ARG_LIST_AS_MACRO a, device_x, device_y +#define KERNEL_CALL_AS_MACRO axpy<<<1, kDataLen>>> +#define KERNEL_NAME_MACRO axpy + +// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) +#define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y) + + +template +__global__ void axpy(T a, T *x, T *y) { // CHECK: y[hipThreadIdx_x] = a * x[hipThreadIdx_x]; y[threadIdx.x] = a * x[threadIdx.x]; } + int main(int argc, char* argv[]) { const int kDataLen = 4; @@ -27,10 +39,29 @@ int main(int argc, char* argv[]) { // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); - // Launch the kernel. + // Launch the kernel in numerous different strange ways to exercise the prerocessor. // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); axpy<<<1, kDataLen>>>(a, device_x, device_y); + // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); + axpy<<<1, kDataLen>>>(a, device_x, device_y); + + // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); + axpy<<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y); + + // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + axpy<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); + + // CHECK: hipLaunchKernelGGL(KERNEL_NAME_MACRO, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); + + // CHECK: hipLaunchKernelGGL(axpy, dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); + KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO); + + // CHECK: COMPLETE_LAUNCH; + COMPLETE_LAUNCH; + + // Copy output data to host. // CHECK: hipDeviceSynchronize(); cudaDeviceSynchronize();