[HIPIFY][#1435] Add HIP_SYMBOL wrapper to the templated Device Symbol argument of the following functions:
cudaMemcpyToSymbol, cudaMemcpyToSymbolAsync, cudaGetSymbolSize, cudaGetSymbolAddress, cudaMemcpyFromSymbol, cudaMemcpyFromSymbolAsync + Add a corresponding cudaSymbolFuncCall matcher. + Add device_symbols.cu test for the above 6 functions, update existed. + Fix dim3() type cast issue, update affected tests. TODO: Do the same in hipify-perl
Цей коміт міститься в:
@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include "HipifyAction.h"
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
@@ -34,6 +36,25 @@ THE SOFTWARE.
|
||||
namespace ct = clang::tooling;
|
||||
namespace mat = clang::ast_matchers;
|
||||
|
||||
const std::string sCudaMemcpyToSymbol = "cudaMemcpyToSymbol";
|
||||
const std::string sCudaMemcpyToSymbolAsync = "cudaMemcpyToSymbolAsync";
|
||||
const std::string sCudaGetSymbolSize = "cudaGetSymbolSize";
|
||||
const std::string sCudaGetSymbolAddress = "cudaGetSymbolAddress";
|
||||
const std::string sCudaMemcpyFromSymbol = "cudaMemcpyFromSymbol";
|
||||
const std::string sCudaMemcpyFromSymbolAsync = "cudaMemcpyFromSymbolAsync";
|
||||
|
||||
const std::set<std::string> DeviceSymbolFunctions0 {
|
||||
{sCudaMemcpyToSymbol},
|
||||
{sCudaMemcpyToSymbolAsync}
|
||||
};
|
||||
|
||||
const std::set<std::string> DeviceSymbolFunctions1 {
|
||||
{sCudaGetSymbolSize},
|
||||
{sCudaGetSymbolAddress},
|
||||
{sCudaMemcpyFromSymbol},
|
||||
{sCudaMemcpyFromSymbolAsync}
|
||||
};
|
||||
|
||||
void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) {
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
size_t begin = 0;
|
||||
@@ -316,8 +337,12 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc
|
||||
|
||||
// Next up are the four kernel configuration parameters, the last two of which are optional and default to zero.
|
||||
// Copy the two dimensional arguments verbatim.
|
||||
OS << "dim3(" << readSourceText(*SM, config->getArg(0)->getSourceRange()) << "), ";
|
||||
OS << "dim3(" << readSourceText(*SM, config->getArg(1)->getSourceRange()) << "), ";
|
||||
std::string sDim3 = "dim3(";
|
||||
for (unsigned int i = 0; i < 2; ++i) {
|
||||
const std::string sArg = readSourceText(*SM, config->getArg(i)->getSourceRange()).str();
|
||||
bool bDim3 = std::equal(sDim3.begin(), sDim3.end(), sArg.c_str());
|
||||
OS << (bDim3 ? "" : sDim3) << sArg << (bDim3 ? "" : ")") << ", ";
|
||||
}
|
||||
// The stream/memory arguments default to zero if omitted.
|
||||
OS << stringifyZeroDefaultedArg(*SM, config->getArg(2)) << ", ";
|
||||
OS << stringifyZeroDefaultedArg(*SM, config->getArg(3));
|
||||
@@ -395,11 +420,50 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match
|
||||
bool HipifyAction::cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (const clang::CallExpr *call = Result.Nodes.getNodeAs<clang::CallExpr>("cudaDeviceFuncCall")) {
|
||||
const clang::FunctionDecl *funcDcl = call->getDirectCallee();
|
||||
if (!funcDcl) {
|
||||
return true;
|
||||
}
|
||||
FindAndReplace(funcDcl->getDeclName().getAsString(), llcompat::getBeginLoc(call), CUDA_DEVICE_FUNC_MAP, false);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HipifyAction::cudaSymbolFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (const clang::CallExpr * call = Result.Nodes.getNodeAs<clang::CallExpr>("cudaSymbolFuncCall")) {
|
||||
if (!call->getNumArgs()) {
|
||||
return true;
|
||||
}
|
||||
const clang::FunctionDecl* funcDcl = call->getDirectCallee();
|
||||
if (!funcDcl) {
|
||||
return true;
|
||||
}
|
||||
std::string sName = funcDcl->getDeclName().getAsString();
|
||||
unsigned int argNum = 0;
|
||||
if (DeviceSymbolFunctions0.find(sName) != DeviceSymbolFunctions0.end()) {
|
||||
argNum = 0;
|
||||
} else if (call->getNumArgs() > 1 && DeviceSymbolFunctions1.find(sName) != DeviceSymbolFunctions1.end()) {
|
||||
argNum = 1;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
clang::SmallString<40> XStr;
|
||||
llvm::raw_svector_ostream OS(XStr);
|
||||
clang::SourceRange sr = call->getArg(argNum)->getSourceRange();
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
const std::string sSymbol = "HIP_SYMBOL";
|
||||
OS << sSymbol << "(" << readSourceText(*SM, sr) << ")";
|
||||
clang::SourceRange replacementRange = getWriteRange(*SM, { sr.getBegin(), sr.getEnd() });
|
||||
clang::SourceLocation s = replacementRange.getBegin();
|
||||
clang::SourceLocation e = replacementRange.getEnd();
|
||||
clang::LangOptions DefaultLangOptions;
|
||||
size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(e, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(s);
|
||||
ct::Replacement Rep(*SM, s, length, OS.str());
|
||||
clang::FullSourceLoc fullSL(s, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void HipifyAction::insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL) {
|
||||
llcompat::insertReplacement(*replacements, rep);
|
||||
if (PrintStats) {
|
||||
@@ -423,6 +487,24 @@ std::unique_ptr<clang::ASTConsumer> HipifyAction::CreateASTConsumer(clang::Compi
|
||||
).bind("cudaSharedIncompleteArrayVar"),
|
||||
this
|
||||
);
|
||||
Finder->addMatcher(
|
||||
mat::callExpr(
|
||||
mat::isExpansionInMainFile(),
|
||||
mat::callee(
|
||||
mat::functionDecl(
|
||||
mat::hasAnyName(
|
||||
sCudaGetSymbolAddress,
|
||||
sCudaGetSymbolSize,
|
||||
sCudaMemcpyFromSymbol,
|
||||
sCudaMemcpyFromSymbolAsync,
|
||||
sCudaMemcpyToSymbol,
|
||||
sCudaMemcpyToSymbolAsync
|
||||
)
|
||||
)
|
||||
)
|
||||
).bind("cudaSymbolFuncCall"),
|
||||
this
|
||||
);
|
||||
Finder->addMatcher(
|
||||
mat::callExpr(
|
||||
mat::isExpansionInMainFile(),
|
||||
@@ -560,5 +642,6 @@ void HipifyAction::ExecuteAction() {
|
||||
void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (cudaLaunchKernel(Result)) return;
|
||||
if (cudaSharedIncompleteArrayVar(Result)) return;
|
||||
if (cudaSymbolFuncCall(Result)) return;
|
||||
if (cudaDeviceFuncCall(Result)) return;
|
||||
}
|
||||
|
||||
@@ -71,6 +71,7 @@ public:
|
||||
bool cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaDeviceFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaSymbolFuncCall(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
// Called by the preprocessor for each include directive during the non-raw lexing pass.
|
||||
void InclusionDirective(clang::SourceLocation hash_loc,
|
||||
const clang::Token &include_token,
|
||||
|
||||
@@ -0,0 +1,152 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args
|
||||
|
||||
/*
|
||||
Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
#include <assert.h>
|
||||
|
||||
#define NUM 1024
|
||||
#define SIZE 1024 * 4
|
||||
|
||||
__device__ int globalIn[NUM];
|
||||
__device__ int globalOut[NUM];
|
||||
|
||||
__global__ void Assign(int* Out) {
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Out[tid] = globalIn[tid];
|
||||
globalOut[tid] = globalIn[tid];
|
||||
}
|
||||
|
||||
__device__ __constant__ int globalConst[NUM];
|
||||
|
||||
__global__ void checkAddress(int* addr, bool* out) {
|
||||
*out = (globalConst == addr);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int *A, *Am, *B, *Ad, *C, *Cm;
|
||||
A = new int[NUM];
|
||||
B = new int[NUM];
|
||||
C = new int[NUM];
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
A[i] = -1 * i;
|
||||
B[i] = 0;
|
||||
C[i] = 0;
|
||||
}
|
||||
// CHECK: hipMalloc((void**)&Ad, SIZE);
|
||||
cudaMalloc((void**)&Ad, SIZE);
|
||||
// CHECK: hipHostMalloc((void**)&Am, SIZE);
|
||||
cudaMallocHost((void**)&Am, SIZE);
|
||||
// CHECK: hipHostMalloc((void**)&Cm, SIZE);
|
||||
cudaMallocHost((void**)&Cm, SIZE);
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
Am[i] = -1 * i;
|
||||
Cm[i] = 0;
|
||||
}
|
||||
// CHECK: hipStream_t stream = NULL;
|
||||
cudaStream_t stream = NULL;
|
||||
// CHECK: hipStreamCreate(&stream);
|
||||
cudaStreamCreate(&stream);
|
||||
// CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream);
|
||||
cudaMemcpyToSymbolAsync(globalIn, Am, SIZE, 0, cudaMemcpyHostToDevice, stream);
|
||||
// CHECK: hipStreamSynchronize(stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
// CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
Assign<<<dim3(1, 1, 1), dim3(NUM, 1, 1)>>>(Ad);
|
||||
// CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost);
|
||||
// CHECK: hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyFromSymbolAsync(Cm, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream);
|
||||
// CHECK: hipStreamSynchronize(stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
assert(Am[i] == B[i]);
|
||||
assert(Am[i] == Cm[i]);
|
||||
}
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
A[i] = -2 * i;
|
||||
B[i] = 0;
|
||||
}
|
||||
// CHECK: hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice);
|
||||
// CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
Assign<<<dim3(1, 1, 1), dim3(NUM, 1, 1)>>>(Ad);
|
||||
// CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost);
|
||||
// CHECK: hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost);
|
||||
cudaMemcpyFromSymbol(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost);
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(A[i] == C[i]);
|
||||
}
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
A[i] = -3 * i;
|
||||
B[i] = 0;
|
||||
}
|
||||
// CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream);
|
||||
cudaMemcpyToSymbolAsync(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice, stream);
|
||||
// CHECK: hipStreamSynchronize(stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
// CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad);
|
||||
Assign<<<dim3(1, 1, 1), dim3(NUM, 1, 1)>>>(Ad);
|
||||
// CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost);
|
||||
// CHECK: hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyFromSymbolAsync(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream);
|
||||
// CHECK: hipStreamSynchronize(stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
for (int i = 0; i < NUM; ++i) {
|
||||
assert(A[i] == B[i]);
|
||||
assert(A[i] == C[i]);
|
||||
}
|
||||
bool *checkOkD;
|
||||
bool checkOk = false;
|
||||
size_t symbolSize = 0;
|
||||
int *symbolAddress;
|
||||
// CHECK: hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst));
|
||||
cudaGetSymbolSize(&symbolSize, globalConst);
|
||||
// CHECK: hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst));
|
||||
cudaGetSymbolAddress((void**) &symbolAddress, globalConst);
|
||||
// CHECK: hipMalloc((void**)&checkOkD, sizeof(bool));
|
||||
cudaMalloc((void**)&checkOkD, sizeof(bool));
|
||||
// CHECK: hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD);
|
||||
checkAddress<<<dim3(1, 1, 1), dim3(1, 1, 1)>>>(symbolAddress, checkOkD);
|
||||
// CHECK: hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost);
|
||||
cudaMemcpy(&checkOk, checkOkD, sizeof(bool), cudaMemcpyDeviceToHost);
|
||||
// CHECK: hipFree(checkOkD);
|
||||
cudaFree(checkOkD);
|
||||
assert(checkOk);
|
||||
assert(symbolSize == SIZE);
|
||||
// CHECK: hipHostFree(Am);
|
||||
cudaFreeHost(Am);
|
||||
// CHECK: hipHostFree(Cm);
|
||||
cudaFreeHost(Cm);
|
||||
// CHECK: hipFree(Ad);
|
||||
cudaFree(Ad);
|
||||
delete[] A;
|
||||
delete[] B;
|
||||
delete[] C;
|
||||
return 0;
|
||||
}
|
||||
@@ -304,7 +304,7 @@ struct runner<curandStateSobol32_t>
|
||||
CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice));
|
||||
|
||||
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
|
||||
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, directions, offset);
|
||||
// CHECK: hipLaunchKernelGGL((init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset);
|
||||
init_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, directions, offset);
|
||||
// CHECK: CUDA_CALL(hipPeekAtLastError());
|
||||
// CHECK: CUDA_CALL(hipDeviceSynchronize());
|
||||
@@ -329,7 +329,7 @@ struct runner<curandStateSobol32_t>
|
||||
const Extra extra)
|
||||
{
|
||||
const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions);
|
||||
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(dim3(blocks_x, dimensions)), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
|
||||
// CHECK: hipLaunchKernelGGL((generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra);
|
||||
generate_kernel<<<dim3(blocks_x, dimensions), threads>>>(states, data, size / dimensions, generate_func, extra);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -247,7 +247,7 @@ API_TYPE set_API_type()
|
||||
void settings()
|
||||
{
|
||||
add_cachiers(cashiers_load);
|
||||
// CHECK: hipMemcpyToSymbol("cashiers_load", cashiers_load_h,
|
||||
// CHECK: hipMemcpyToSymbol(HIP_SYMBOL("cashiers_load"), cashiers_load_h,
|
||||
// CHECK: HOURS * sizeof(int), 0, hipMemcpyHostToDevice);
|
||||
cudaMemcpyToSymbol("cashiers_load", cashiers_load_h,
|
||||
HOURS * sizeof(int), 0, cudaMemcpyHostToDevice);
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача