Merge pull request #1398 from emankov/master
[HIPIFY] Add device functions support
This commit is contained in:
@@ -65,6 +65,8 @@ extern const std::map<llvm::StringRef, hipCounter> CUDA_SPARSE_FUNCTION_MAP;
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_TYPE_NAME_MAP;
|
||||
// Maps the names of CUDA CAFFE2 API functions to the corresponding HIP functions
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_CAFFE2_FUNCTION_MAP;
|
||||
// Maps the names of CUDA Device functions to the corresponding HIP functions
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNC_MAP;
|
||||
|
||||
/**
|
||||
* The union of all the above maps, except includes.
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "CUDA2HIP.h"
|
||||
|
||||
// Maps CUDA header names to HIP header names
|
||||
const std::map<llvm::StringRef, hipCounter> CUDA_DEVICE_FUNC_MAP{
|
||||
{"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
{"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}},
|
||||
};
|
||||
@@ -67,7 +67,6 @@ void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) {
|
||||
* Otherwise, the source file is updated with the corresponding hipification.
|
||||
*/
|
||||
void HipifyAction::RewriteToken(const clang::Token& t) {
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
// String literals containing CUDA references need fixing.
|
||||
if (t.is(clang::tok::string_literal)) {
|
||||
StringRef s(t.getLiteralData(), t.getLength());
|
||||
@@ -78,13 +77,19 @@ void HipifyAction::RewriteToken(const clang::Token& t) {
|
||||
return;
|
||||
}
|
||||
StringRef name = t.getRawIdentifier();
|
||||
const auto found = CUDA_RENAMES_MAP().find(name);
|
||||
if (found == CUDA_RENAMES_MAP().end()) {
|
||||
clang::SourceLocation sl = t.getLocation();
|
||||
FindAndReplace(name, sl, CUDA_RENAMES_MAP());
|
||||
}
|
||||
|
||||
void HipifyAction::FindAndReplace(llvm::StringRef name,
|
||||
clang::SourceLocation sl,
|
||||
const std::map<llvm::StringRef, hipCounter>& repMap) {
|
||||
const auto found = repMap.find(name);
|
||||
if (found == repMap.end()) {
|
||||
// So it's an identifier, but not CUDA? Boring.
|
||||
return;
|
||||
}
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
clang::SourceLocation sl = t.getLocation();
|
||||
clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics();
|
||||
// Warn the user about unsupported identifier.
|
||||
if (Statistics::isUnsupported(found->second)) {
|
||||
@@ -96,6 +101,7 @@ void HipifyAction::RewriteToken(const clang::Token& t) {
|
||||
return;
|
||||
}
|
||||
StringRef repName = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName;
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
ct::Replacement Rep(SM, sl, name.size(), repName.str());
|
||||
clang::FullSourceLoc fullSL(sl, SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
@@ -372,6 +378,14 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HipifyAction::cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (const clang::CallExpr *call = Result.Nodes.getNodeAs<clang::CallExpr>("cudaDeviceFuncCall")) {
|
||||
const clang::FunctionDecl *funcDcl = call->getDirectCallee();
|
||||
FindAndReplace(funcDcl->getDeclName().getAsString(), llcompat::getBeginLoc(call), CUDA_DEVICE_FUNC_MAP);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void HipifyAction::insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL) {
|
||||
llcompat::insertReplacement(*replacements, rep);
|
||||
if (PrintStats) {
|
||||
@@ -395,7 +409,22 @@ std::unique_ptr<clang::ASTConsumer> HipifyAction::CreateASTConsumer(clang::Compi
|
||||
).bind("cudaSharedIncompleteArrayVar"),
|
||||
this
|
||||
);
|
||||
// Ownership is transferred to the caller...
|
||||
Finder->addMatcher(
|
||||
mat::callExpr(
|
||||
mat::isExpansionInMainFile(),
|
||||
mat::callee(
|
||||
mat::functionDecl(
|
||||
mat::anyOf(
|
||||
mat::hasAttr(clang::attr::CUDADevice),
|
||||
mat::hasAttr(clang::attr::CUDAGlobal)
|
||||
),
|
||||
mat::unless(mat::hasAttr(clang::attr::CUDAHost))
|
||||
)
|
||||
)
|
||||
).bind("cudaDeviceFuncCall"),
|
||||
this
|
||||
);
|
||||
// Ownership is transferred to the caller.
|
||||
return Finder->newASTConsumer();
|
||||
}
|
||||
|
||||
@@ -517,4 +546,5 @@ void HipifyAction::ExecuteAction() {
|
||||
void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (cudaLaunchKernel(Result)) return;
|
||||
if (cudaSharedIncompleteArrayVar(Result)) return;
|
||||
if (cudaDeviceFuncCall(Result)) return;
|
||||
}
|
||||
|
||||
@@ -70,6 +70,7 @@ public:
|
||||
bool cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
// Called by the preprocessor for each include directive during the non-raw lexing pass.
|
||||
void InclusionDirective(clang::SourceLocation hash_loc,
|
||||
const clang::Token &include_token,
|
||||
@@ -99,4 +100,5 @@ protected:
|
||||
void run(const clang::ast_matchers::MatchFinder::MatchResult& Result) override;
|
||||
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &CI, llvm::StringRef InFile) override;
|
||||
bool Exclude(const hipCounter & hipToken);
|
||||
void FindAndReplace(llvm::StringRef name, clang::SourceLocation sl, const std::map<llvm::StringRef, hipCounter>& repMap);
|
||||
};
|
||||
|
||||
@@ -57,6 +57,7 @@ const char *counterNames[NUM_CONV_TYPES] = {
|
||||
"complex", // CONV_COMPLEX
|
||||
"library", // CONV_LIB_FUNC
|
||||
"device_library", // CONV_LIB_DEVICE_FUNC
|
||||
"device_function", // CONV_DEVICE_FUNC
|
||||
"include", // CONV_INCLUDE
|
||||
"include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H
|
||||
"type", // CONV_TYPE
|
||||
|
||||
@@ -112,6 +112,7 @@ enum ConvTypes {
|
||||
CONV_COMPLEX,
|
||||
CONV_LIB_FUNC,
|
||||
CONV_LIB_DEVICE_FUNC,
|
||||
CONV_DEVICE_FUNC,
|
||||
CONV_INCLUDE,
|
||||
CONV_INCLUDE_CUDA_MAIN_H,
|
||||
CONV_TYPE,
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
|
||||
// Test to warn only on device functions umin and umax as unsupported, but not on user defined ones.
|
||||
// ToDo: change lit testing in order to parse the output.
|
||||
|
||||
#define LEN 1024
|
||||
#define SIZE LEN * sizeof(float)
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace my {
|
||||
unsigned int umin(unsigned int arg1, unsigned int arg2) {
|
||||
return (arg1 < arg2) ? arg1 : arg2;
|
||||
}
|
||||
unsigned int umax(unsigned int arg1, unsigned int arg2) {
|
||||
return (arg1 > arg2) ? arg1 : arg2;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void uint_arithm(float* A, float* B, float* C, unsigned int u1, unsigned int u2)
|
||||
{
|
||||
unsigned int _umin = umin(u1, u2);
|
||||
unsigned int _umax = umax(u1, u2);
|
||||
int i = threadIdx.x;
|
||||
A[i] = i + _umin;
|
||||
B[i] = i + _umax;
|
||||
C[i] = A[i] + B[i];
|
||||
}
|
||||
|
||||
int main() {
|
||||
unsigned int u1 = 33;
|
||||
unsigned int u2 = 34;
|
||||
unsigned int _min = my::umin(u1, u2);
|
||||
unsigned int _max = my::umax(u1, u2);
|
||||
float *A, *B, *C;
|
||||
// CHECK: hipMalloc((void**)&A, SIZE);
|
||||
cudaMalloc((void**)&A, SIZE);
|
||||
// CHECK: hipMalloc((void**)&B, SIZE);
|
||||
cudaMalloc((void**)&B, SIZE);
|
||||
// CHECK: hipMalloc((void**)&C, SIZE);
|
||||
cudaMalloc((void**)&C, SIZE);
|
||||
dim3 dimGrid(LEN / 512, 1, 1);
|
||||
dim3 dimBlock(512, 1, 1);
|
||||
// CHECK: hipLaunchKernelGGL(uint_arithm, dim3(dimGrid), dim3(dimBlock), 0, 0, A, B, C, u1, u2);
|
||||
uint_arithm<<<dimGrid, dimBlock>>>(A, B, C, u1, u2);
|
||||
return _min < _max;
|
||||
}
|
||||
Fai riferimento in un nuovo problema
Block a user