Merge branch 'master' of https://github.com/ROCm-Developer-Tools/HIP
このコミットが含まれているのは:
+4
-2
@@ -127,7 +127,9 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
|
||||
# Force -stdlib=libc++ on UB14.04
|
||||
$HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`;
|
||||
if ($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"") {
|
||||
if (($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"")
|
||||
or ($HOST_OSNAME eq "\"centos\"" and $HOST_OSVER eq "\"7\"")
|
||||
or ($HOST_OSNAME eq "\"rhel\"" and $HOST_OSVER eq "\"7.4\"")) {
|
||||
$HIPCXXFLAGS .= " -stdlib=libc++";
|
||||
$setStdLib = 1;
|
||||
}
|
||||
@@ -136,7 +138,6 @@ if ($HIP_PLATFORM eq "hcc") {
|
||||
$HIPCXXFLAGS .= " -I$HSA_PATH/include";
|
||||
$HIPCXXFLAGS .= " -Wno-deprecated-register";
|
||||
|
||||
$HIPLDFLAGS .= " -lsupc++";
|
||||
$HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am -lhsakmt ";
|
||||
# $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport ";
|
||||
|
||||
@@ -438,6 +439,7 @@ if($HIP_PLATFORM eq "hcc"){
|
||||
if ($target_gfx900 eq 1) {
|
||||
$HIPLDFLAGS .= " --amdgpu-target=gfx900";
|
||||
$HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
|
||||
$ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
#include "ArgParse.h"
|
||||
|
||||
cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options");
|
||||
|
||||
cl::opt<std::string> OutputFilename("o",
|
||||
cl::desc("Output filename"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<bool> Inplace("inplace",
|
||||
cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"),
|
||||
cl::value_desc("inplace"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<bool> NoBackup("no-backup",
|
||||
cl::desc("Don't create a backup file for the hipified source"),
|
||||
cl::value_desc("no-backup"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<bool> NoOutput("no-output",
|
||||
cl::desc("Don't write any translated output to stdout"),
|
||||
cl::value_desc("no-output"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<bool> PrintStats("print-stats",
|
||||
cl::desc("Print translation statistics"),
|
||||
cl::value_desc("print-stats"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<std::string> OutputStatsFilename("o-stats",
|
||||
cl::desc("Output filename for statistics"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::opt<bool> Examine("examine",
|
||||
cl::desc("Combines -no-output and -print-stats options"),
|
||||
cl::value_desc("examine"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
cl::extrahelp CommonHelp(ct::CommonOptionsParser::HelpMessage);
|
||||
@@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include "clang/Tooling/CommonOptionsParser.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
namespace cl = llvm::cl;
|
||||
namespace ct = clang::tooling;
|
||||
|
||||
extern cl::OptionCategory ToolTemplateCategory;
|
||||
|
||||
extern cl::opt<std::string> OutputFilename;
|
||||
extern cl::opt<bool> Inplace;
|
||||
extern cl::opt<bool> NoBackup;
|
||||
extern cl::opt<bool> NoOutput;
|
||||
extern cl::opt<bool> PrintStats;
|
||||
extern cl::opt<std::string> OutputStatsFilename;
|
||||
extern cl::opt<bool> Examine;
|
||||
|
||||
extern cl::extrahelp CommonHelp;
|
||||
@@ -312,9 +312,16 @@ const std::map<llvm::StringRef, hipCounter> CUDA_TYPE_NAME_MAP{
|
||||
/// Maps cuda header names to hip header names.
|
||||
const std::map <llvm::StringRef, hipCounter> CUDA_INCLUDE_MAP{
|
||||
// CUDA includes
|
||||
{"cuda.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}},
|
||||
{"cuda_runtime.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}},
|
||||
{"cuda_runtime_api.h", {"hip/hip_runtime_api.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"cuda.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}},
|
||||
{"cuda_runtime.h", {"hip/hip_runtime.h", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}},
|
||||
{"cuda_runtime_api.h", {"hip/hip_runtime_api.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"channel_descriptor.h", {"hip/channel_descriptor.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"device_functions.h", {"hip/device_functions.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"driver_types.h", {"hip/driver_types.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"cuComplex.h", {"hip/hip_complex.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"cuda_fp16.h", {"hip/hip_fp16.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"cuda_texture_types.h", {"hip/hip_texture_types.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
{"vector_types.h", {"hip/hip_vector_types.h", CONV_INCLUDE, API_RUNTIME}},
|
||||
|
||||
// CUBLAS includes
|
||||
{"cublas.h", {"hipblas.h", CONV_INCLUDE, API_BLAS}},
|
||||
@@ -2677,7 +2684,6 @@ const std::map<llvm::StringRef, hipCounter>& CUDA_RENAMES_MAP() {
|
||||
|
||||
// First run, so compute the union map.
|
||||
ret = CUDA_IDENTIFIER_MAP;
|
||||
ret.insert(CUDA_INCLUDE_MAP.begin(), CUDA_INCLUDE_MAP.end());
|
||||
ret.insert(CUDA_TYPE_NAME_MAP.begin(), CUDA_TYPE_NAME_MAP.end());
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -1,861 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/**
|
||||
* @file Cuda2Hip.cpp
|
||||
*
|
||||
* This file is compiled and linked into clang based hipify tool.
|
||||
*/
|
||||
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||
#include "clang/ASTMatchers/ASTMatchers.h"
|
||||
#include "clang/Basic/SourceManager.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/Frontend/FrontendActions.h"
|
||||
#include "clang/Frontend/TextDiagnosticPrinter.h"
|
||||
#include "clang/Lex/Lexer.h"
|
||||
#include "clang/Lex/MacroArgs.h"
|
||||
#include "clang/Lex/MacroInfo.h"
|
||||
#include "clang/Lex/PPCallbacks.h"
|
||||
#include "clang/Lex/Preprocessor.h"
|
||||
#include "clang/Rewrite/Core/Rewriter.h"
|
||||
#include "clang/Tooling/CommonOptionsParser.h"
|
||||
#include "clang/Tooling/Refactoring.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/Signals.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "CUDA2HipMap.h"
|
||||
#include "LLVMCompat.h"
|
||||
#include "StringUtils.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace clang::ast_matchers;
|
||||
using namespace clang::tooling;
|
||||
using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "cuda2hip"
|
||||
|
||||
|
||||
// Set up the command line options
|
||||
static cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options");
|
||||
|
||||
static cl::opt<std::string> OutputFilename("o",
|
||||
cl::desc("Output filename"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> Inplace("inplace",
|
||||
cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"),
|
||||
cl::value_desc("inplace"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> NoBackup("no-backup",
|
||||
cl::desc("Don't create a backup file for the hipified source"),
|
||||
cl::value_desc("no-backup"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> NoOutput("no-output",
|
||||
cl::desc("Don't write any translated output to stdout"),
|
||||
cl::value_desc("no-output"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> PrintStats("print-stats",
|
||||
cl::desc("Print translation statistics"),
|
||||
cl::value_desc("print-stats"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<std::string> OutputStatsFilename("o-stats",
|
||||
cl::desc("Output filename for statistics"),
|
||||
cl::value_desc("filename"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::opt<bool> Examine("examine",
|
||||
cl::desc("Combines -no-output and -print-stats options"),
|
||||
cl::value_desc("examine"),
|
||||
cl::cat(ToolTemplateCategory));
|
||||
|
||||
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);
|
||||
|
||||
class Cuda2Hip {
|
||||
public:
|
||||
Cuda2Hip(Replacements& R, const std::string &srcFileName) :
|
||||
Replace(R), mainFileName(srcFileName) {}
|
||||
|
||||
enum msgTypes {
|
||||
HIPIFY_ERROR = 0,
|
||||
HIPIFY_WARNING
|
||||
};
|
||||
|
||||
std::string getMsgType(msgTypes type) {
|
||||
switch (type) {
|
||||
case HIPIFY_ERROR: return "error";
|
||||
default:
|
||||
case HIPIFY_WARNING: return "warning";
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
Replacements& Replace;
|
||||
std::string mainFileName;
|
||||
|
||||
virtual void insertReplacement(const Replacement &rep, const FullSourceLoc &fullSL) {
|
||||
llcompat::insertReplacement(Replace, rep);
|
||||
if (PrintStats) {
|
||||
rep.getLength();
|
||||
Statistics::current().lineTouched(fullSL.getExpansionLineNumber());
|
||||
Statistics::current().bytesChanged(rep.getLength());
|
||||
}
|
||||
}
|
||||
void insertHipHeaders(Cuda2Hip *owner, const SourceManager &SM) {
|
||||
if (Replace.size() > 0) {
|
||||
std::string repName = "#include <hip/hip_runtime.h>";
|
||||
Statistics::current().incrementCounter({repName, ConvTypes::CONV_INCLUDE_CUDA_MAIN_H, ApiTypes::API_RUNTIME}, "#include <cuda>");
|
||||
SourceLocation sl = SM.getLocForStartOfFile(SM.getMainFileID());
|
||||
FullSourceLoc fullSL(sl, SM);
|
||||
Replacement Rep(SM, sl, 0, repName + "\n");
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
}
|
||||
|
||||
void printHipifyMessage(const SourceManager &SM, const SourceLocation &sl, const std::string &message, msgTypes msgType = HIPIFY_WARNING) {
|
||||
FullSourceLoc fullSL(sl, SM);
|
||||
llvm::errs() << "[HIPIFY] " << getMsgType(msgType) << ": " << mainFileName << ":" << fullSL.getExpansionLineNumber() << ":" << fullSL.getExpansionColumnNumber() << ": " << message << "\n";
|
||||
}
|
||||
|
||||
void processString(StringRef s, SourceManager &SM, SourceLocation start) {
|
||||
size_t begin = 0;
|
||||
while ((begin = s.find("cu", begin)) != StringRef::npos) {
|
||||
const size_t end = s.find_first_of(" ", begin + 4);
|
||||
StringRef name = s.slice(begin, end);
|
||||
const auto found = CUDA_RENAMES_MAP().find(name);
|
||||
if (found != CUDA_RENAMES_MAP().end()) {
|
||||
StringRef repName = found->second.hipName;
|
||||
hipCounter counter = {"[string literal]", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.unsupported};
|
||||
Statistics::current().incrementCounter(counter, name.str());
|
||||
if (!counter.unsupported) {
|
||||
SourceLocation sl = start.getLocWithOffset(begin + 1);
|
||||
Replacement Rep(SM, sl, name.size(), repName);
|
||||
FullSourceLoc fullSL(sl, SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
}
|
||||
|
||||
if (end == StringRef::npos) {
|
||||
break;
|
||||
}
|
||||
begin = end + 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class Cuda2HipCallback;
|
||||
|
||||
class HipifyPPCallbacks : public PPCallbacks, public SourceFileCallbacks, public Cuda2Hip {
|
||||
public:
|
||||
HipifyPPCallbacks(Replacements& R, const std::string &mainFileName)
|
||||
: Cuda2Hip(R, mainFileName) {}
|
||||
|
||||
virtual bool handleBeginSource(CompilerInstance &CI
|
||||
#if LLVM_VERSION_MAJOR <= 4
|
||||
, StringRef Filename
|
||||
#endif
|
||||
) override {
|
||||
Preprocessor &PP = CI.getPreprocessor();
|
||||
SourceManager &SM = CI.getSourceManager();
|
||||
setSourceManager(&SM);
|
||||
PP.addPPCallbacks(std::unique_ptr<HipifyPPCallbacks>(this));
|
||||
setPreprocessor(&PP);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void handleEndSource() override;
|
||||
|
||||
virtual void InclusionDirective(SourceLocation hash_loc,
|
||||
const Token &include_token,
|
||||
StringRef file_name, bool is_angled,
|
||||
CharSourceRange filename_range,
|
||||
const FileEntry *file, StringRef search_path,
|
||||
StringRef relative_path,
|
||||
const clang::Module *imported) override {
|
||||
if (!_sm->isWrittenInMainFile(hash_loc) || !is_angled) {
|
||||
return; // We're looking to rewrite angle-includes in the main file to point to hip.
|
||||
}
|
||||
|
||||
const auto found = CUDA_INCLUDE_MAP.find(file_name);
|
||||
if (found == CUDA_INCLUDE_MAP.end()) {
|
||||
// Not a CUDA include - don't touch it.
|
||||
return;
|
||||
}
|
||||
|
||||
Statistics::current().incrementCounter(found->second, file_name.str());
|
||||
if (found->second.unsupported) {
|
||||
// An unsupported CUDA header? Oh dear. Print a warning.
|
||||
printHipifyMessage(*_sm, hash_loc, "Unsupported CUDA header used: " + file_name.str());
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef repName = found->second.hipName;
|
||||
DEBUG(dbgs() << "Include file found: " << file_name << "\n"
|
||||
<< "SourceLocation: "
|
||||
<< filename_range.getBegin().printToString(*_sm) << "\n"
|
||||
<< "Will be replaced with " << repName << "\n");
|
||||
SourceLocation sl = filename_range.getBegin();
|
||||
SourceLocation sle = filename_range.getEnd();
|
||||
const char *B = _sm->getCharacterData(sl);
|
||||
const char *E = _sm->getCharacterData(sle);
|
||||
SmallString<128> tmpData;
|
||||
Replacement Rep(*_sm, sl, E - B, Twine("<" + repName + ">").toStringRef(tmpData));
|
||||
FullSourceLoc fullSL(sl, *_sm);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Look at, and consider altering, a given token.
|
||||
*
|
||||
* If it's not a CUDA identifier, nothing happens.
|
||||
* If it's an unsupported CUDA identifier, a warning is emitted.
|
||||
* Otherwise, the source file is updated with the corresponding hipification.
|
||||
*/
|
||||
void RewriteToken(Token t) {
|
||||
// String literals containing CUDA references need fixing...
|
||||
if (t.is(tok::string_literal)) {
|
||||
StringRef s(t.getLiteralData(), t.getLength());
|
||||
processString(unquoteStr(s), *_sm, t.getLocation());
|
||||
return;
|
||||
} else if (!t.isAnyIdentifier()) {
|
||||
// If it's neither a string nor an identifier, we don't care.
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef name = t.getIdentifierInfo()->getName();
|
||||
const auto found = CUDA_RENAMES_MAP().find(name);
|
||||
if (found == CUDA_RENAMES_MAP().end()) {
|
||||
// So it's an identifier, but not CUDA? Boring.
|
||||
return;
|
||||
}
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
|
||||
SourceLocation sl = t.getLocation();
|
||||
if (found->second.unsupported) {
|
||||
// An unsupported identifier? Curses! Warn the user.
|
||||
printHipifyMessage(*_sm, sl, "Unsupported CUDA identifier used: " + name.str());
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef repName = found->second.hipName;
|
||||
Replacement Rep(*_sm, sl, name.size(), repName);
|
||||
FullSourceLoc fullSL(sl, *_sm);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
|
||||
virtual void MacroDefined(const Token &MacroNameTok,
|
||||
const MacroDirective *MD) override {
|
||||
if (!_sm->isWrittenInMainFile(MD->getLocation()) ||
|
||||
MD->getKind() != MacroDirective::MD_Define) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto T : MD->getMacroInfo()->tokens()) {
|
||||
RewriteToken(T);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void MacroExpands(const Token &MacroNameTok,
|
||||
const MacroDefinition &MD, SourceRange Range,
|
||||
const MacroArgs *Args) override {
|
||||
|
||||
if (!_sm->isWrittenInMainFile(MacroNameTok.getLocation())) {
|
||||
return; // Macros in headers are not our concern.
|
||||
}
|
||||
|
||||
// Is the macro itself a CUDA identifier? If so, rewrite it
|
||||
RewriteToken(MacroNameTok);
|
||||
|
||||
// If it's a macro with arguments, rewrite all the arguments as hip, too.
|
||||
for (unsigned int i = 0; Args && i < MD.getMacroInfo()->GET_NUM_ARGS(); i++) {
|
||||
std::vector<Token> toks;
|
||||
// Code below is a kind of stolen from 'MacroArgs::getPreExpArgument'
|
||||
// to workaround the 'const' MacroArgs passed into this hook.
|
||||
const Token *start = Args->getUnexpArgument(i);
|
||||
size_t len = Args->getArgLength(start) + 1;
|
||||
llcompat::EnterPreprocessorTokenStream(*_pp, start, len, false);
|
||||
|
||||
do {
|
||||
toks.push_back(Token());
|
||||
Token &tk = toks.back();
|
||||
_pp->Lex(tk);
|
||||
} while (toks.back().isNot(tok::eof));
|
||||
|
||||
_pp->RemoveTopOfLexerStack();
|
||||
// end of stolen code
|
||||
|
||||
for (auto tok : toks) {
|
||||
RewriteToken(tok);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EndOfMainFile() override {}
|
||||
|
||||
void setSourceManager(SourceManager *sm) { _sm = sm; }
|
||||
void setPreprocessor(Preprocessor *pp) { _pp = pp; }
|
||||
void setMatch(Cuda2HipCallback *match) { Match = match; }
|
||||
|
||||
private:
|
||||
SourceManager *_sm = nullptr;
|
||||
Preprocessor *_pp = nullptr;
|
||||
Cuda2HipCallback *Match = nullptr;
|
||||
};
|
||||
|
||||
class Cuda2HipCallback : public MatchFinder::MatchCallback, public Cuda2Hip {
|
||||
private:
|
||||
bool cudaCall(const MatchFinder::MatchResult &Result) {
|
||||
const CallExpr *call = Result.Nodes.getNodeAs<CallExpr>("cudaCall");
|
||||
if (!call) {
|
||||
return false; // Another handler will do it.
|
||||
}
|
||||
|
||||
const FunctionDecl *funcDcl = call->getDirectCallee();
|
||||
std::string name = funcDcl->getDeclName().getAsString();
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
SourceLocation sl = call->getLocStart();
|
||||
|
||||
// TODO: Make a lookup table just for functions to improve performance.
|
||||
const auto found = CUDA_IDENTIFIER_MAP.find(name);
|
||||
if (found == CUDA_IDENTIFIER_MAP.end()) {
|
||||
std::string msg = "the following reference is not handled: '" + name + "' [function call].";
|
||||
printHipifyMessage(*SM, sl, msg);
|
||||
return true;
|
||||
}
|
||||
|
||||
const hipCounter& hipCtr = found->second;
|
||||
Statistics::current().incrementCounter(hipCtr, name);
|
||||
|
||||
if (hipCtr.unsupported) {
|
||||
return true; // Silently fail when you find an unsupported member.
|
||||
// TODO: Print a warning with the diagnostics API?
|
||||
}
|
||||
|
||||
size_t length = name.size();
|
||||
Replacement Rep(*SM, sl, length, hipCtr.hipName);
|
||||
FullSourceLoc fullSL(sl, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SourceRange getReadRange(clang::SourceManager &SM, const SourceRange &exprRange) {
|
||||
SourceLocation begin = exprRange.getBegin();
|
||||
SourceLocation end = exprRange.getEnd();
|
||||
|
||||
bool beginSafe = !SM.isMacroBodyExpansion(begin) || Lexer::isAtStartOfMacroExpansion(begin, SM, LangOptions{});
|
||||
bool endSafe = !SM.isMacroBodyExpansion(end) || Lexer::isAtEndOfMacroExpansion(end, SM, LangOptions{});
|
||||
|
||||
if (beginSafe && endSafe) {
|
||||
return {SM.getFileLoc(begin), SM.getFileLoc(end)};
|
||||
} else {
|
||||
return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)};
|
||||
}
|
||||
}
|
||||
|
||||
SourceRange getWriteRange(clang::SourceManager &SM, const SourceRange &exprRange) {
|
||||
SourceLocation begin = exprRange.getBegin();
|
||||
SourceLocation end = exprRange.getEnd();
|
||||
|
||||
// If the range is contained within a macro, update the macro definition.
|
||||
// Otherwise, use the file location and hope for the best.
|
||||
if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) {
|
||||
return {SM.getFileLoc(begin), SM.getFileLoc(end)};
|
||||
}
|
||||
|
||||
return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)};
|
||||
}
|
||||
|
||||
StringRef readSourceText(clang::SourceManager& SM, const SourceRange& exprRange) {
|
||||
return Lexer::getSourceText(CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, LangOptions(), nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representation of the expression `arg`, unless it's a defaulting function
|
||||
* call argument, in which case get a 0. Used for building argument lists to kernel calls.
|
||||
*/
|
||||
std::string stringifyZeroDefaultedArg(SourceManager& SM, const Expr* arg) {
|
||||
if (isa<CXXDefaultArgExpr>(arg)) {
|
||||
return "0";
|
||||
} else {
|
||||
return readSourceText(SM, arg->getSourceRange());
|
||||
}
|
||||
}
|
||||
|
||||
bool cudaLaunchKernel(const MatchFinder::MatchResult &Result) {
|
||||
StringRef refName = "cudaLaunchKernel";
|
||||
if (const CUDAKernelCallExpr *launchKernel = Result.Nodes.getNodeAs<CUDAKernelCallExpr>(refName)) {
|
||||
SmallString<40> XStr;
|
||||
raw_svector_ostream OS(XStr);
|
||||
|
||||
LangOptions DefaultLangOptions;
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
|
||||
const Expr& calleeExpr = *(launchKernel->getCallee());
|
||||
OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", ";
|
||||
|
||||
// Next up are the four kernel configuration parameters, the last two of which are optional and default to zero.
|
||||
const CallExpr& config = *(launchKernel->getConfig());
|
||||
|
||||
// Copy the two dimensional arguments verbatim.
|
||||
OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), ";
|
||||
OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), ";
|
||||
|
||||
// The stream/memory arguments default to zero if omitted.
|
||||
OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", ";
|
||||
OS << stringifyZeroDefaultedArg(*SM, config.getArg(3));
|
||||
|
||||
// If there are ordinary arguments to the kernel, just copy them verbatim into our new call.
|
||||
int numArgs = launchKernel->getNumArgs();
|
||||
if (numArgs > 0) {
|
||||
OS << ", ";
|
||||
|
||||
// Start of the first argument.
|
||||
SourceLocation argStart = launchKernel->getArg(0)->getLocStart();
|
||||
|
||||
// End of the last argument.
|
||||
SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd();
|
||||
|
||||
OS << readSourceText(*SM, {argStart, argEnd});
|
||||
}
|
||||
|
||||
OS << ")";
|
||||
|
||||
SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()});
|
||||
SourceLocation launchStart = replacementRange.getBegin();
|
||||
SourceLocation launchEnd = replacementRange.getEnd();
|
||||
|
||||
size_t length = SM->getCharacterData(Lexer::getLocForEndOfToken(
|
||||
launchEnd, 0, *SM, DefaultLangOptions)) -
|
||||
SM->getCharacterData(launchStart);
|
||||
|
||||
Replacement Rep(*SM, launchStart, length, OS.str());
|
||||
FullSourceLoc fullSL(launchStart, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
hipCounter counter = {"hipLaunchKernelGGL", ConvTypes::CONV_KERN, ApiTypes::API_RUNTIME};
|
||||
Statistics::current().incrementCounter(counter, refName.str());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cudaBuiltin(const MatchFinder::MatchResult &Result) {
|
||||
if (const MemberExpr *threadIdx = Result.Nodes.getNodeAs<MemberExpr>("cudaBuiltin")) {
|
||||
if (const OpaqueValueExpr *refBase =
|
||||
dyn_cast<OpaqueValueExpr>(threadIdx->getBase())) {
|
||||
if (const DeclRefExpr *declRef =
|
||||
dyn_cast<DeclRefExpr>(refBase->getSourceExpr())) {
|
||||
SourceLocation sl = threadIdx->getLocStart();
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
StringRef name = declRef->getDecl()->getName();
|
||||
StringRef memberName = threadIdx->getMemberDecl()->getName();
|
||||
size_t pos = memberName.find_first_not_of("__fetch_builtin_");
|
||||
memberName = memberName.slice(pos, memberName.size());
|
||||
SmallString<128> tmpData;
|
||||
name = Twine(name + "." + memberName).toStringRef(tmpData);
|
||||
|
||||
// TODO: Make a lookup table just for builtins to improve performance.
|
||||
const auto found = CUDA_IDENTIFIER_MAP.find(name);
|
||||
if (found != CUDA_IDENTIFIER_MAP.end()) {
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
if (!found->second.unsupported) {
|
||||
StringRef repName = found->second.hipName;
|
||||
Replacement Rep(*SM, sl, name.size(), repName);
|
||||
FullSourceLoc fullSL(sl, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
} else {
|
||||
std::string msg = "the following reference is not handled: '" + name.str() + "' [builtin].";
|
||||
printHipifyMessage(*SM, sl, msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cudaEnumConstantRef(const MatchFinder::MatchResult &Result) {
|
||||
if (const DeclRefExpr *enumConstantRef = Result.Nodes.getNodeAs<DeclRefExpr>("cudaEnumConstantRef")) {
|
||||
StringRef name = enumConstantRef->getDecl()->getName();
|
||||
SourceLocation sl = enumConstantRef->getLocStart();
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
|
||||
// TODO: Make a lookup table just for enum values to improve performance.
|
||||
const auto found = CUDA_IDENTIFIER_MAP.find(name);
|
||||
if (found != CUDA_IDENTIFIER_MAP.end()) {
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
if (!found->second.unsupported) {
|
||||
StringRef repName = found->second.hipName;
|
||||
Replacement Rep(*SM, sl, name.size(), repName);
|
||||
FullSourceLoc fullSL(sl, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
} else {
|
||||
std::string msg = "the following reference is not handled: '" + name.str() + "' [enum constant ref].";
|
||||
printHipifyMessage(*SM, sl, msg);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool cudaType(const MatchFinder::MatchResult& Result) {
|
||||
const clang::TypeLoc* ret = Result.Nodes.getNodeAs<TypeLoc>("cudaType");
|
||||
if (!ret) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Ignore qualifiers - they don't alter our decision to rename.
|
||||
clang::UnqualTypeLoc tl = ret->getUnqualifiedLoc();
|
||||
const Type& typeObject = *(tl.getTypePtr());
|
||||
|
||||
std::string typeName = tl.getType().getAsString();
|
||||
|
||||
// Irritatingly, enum/struct types are identified as `enum/struct <something>`, and unlike most compound
|
||||
// types (such as pointers or references), there isn't another type node inside. So we have
|
||||
// to make do with what we've got. There's probably a better way of doing this...
|
||||
if (typeObject.isEnumeralType()) {
|
||||
removePrefixIfPresent(typeName, "enum ");
|
||||
}
|
||||
if (typeObject.isStructureType()) {
|
||||
removePrefixIfPresent(typeName, "struct ");
|
||||
}
|
||||
|
||||
// Do we have a replacement for this type?
|
||||
const auto found = CUDA_TYPE_NAME_MAP.find(typeName);
|
||||
if (found == CUDA_TYPE_NAME_MAP.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SourceManager &SM = *(Result.SourceManager);
|
||||
|
||||
// Start of the type expression to replace.
|
||||
SourceLocation sl = tl.getBeginLoc();
|
||||
|
||||
const hipCounter& hipCtr = found->second;
|
||||
if (hipCtr.unsupported) {
|
||||
printHipifyMessage(SM, sl, "Unsupported CUDA '" + typeName);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Apply the rename!
|
||||
Replacement Rep(SM, sl, typeName.size(), hipCtr.hipName);
|
||||
FullSourceLoc fullSL(sl, SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cudaSharedIncompleteArrayVar(const MatchFinder::MatchResult &Result) {
|
||||
StringRef refName = "cudaSharedIncompleteArrayVar";
|
||||
if (const VarDecl *sharedVar = Result.Nodes.getNodeAs<VarDecl>(refName)) {
|
||||
// Example: extern __shared__ uint sRadix1[];
|
||||
if (sharedVar->hasExternalFormalLinkage()) {
|
||||
QualType QT = sharedVar->getType();
|
||||
std::string typeName;
|
||||
if (QT->isIncompleteArrayType()) {
|
||||
const ArrayType *AT = QT.getTypePtr()->getAsArrayTypeUnsafe();
|
||||
QT = AT->getElementType();
|
||||
if (QT.getTypePtr()->isBuiltinType()) {
|
||||
QT = QT.getCanonicalType();
|
||||
const BuiltinType *BT = dyn_cast<BuiltinType>(QT);
|
||||
if (BT) {
|
||||
LangOptions LO;
|
||||
LO.CUDA = true;
|
||||
PrintingPolicy policy(LO);
|
||||
typeName = BT->getName(policy);
|
||||
}
|
||||
} else {
|
||||
typeName = QT.getAsString();
|
||||
}
|
||||
}
|
||||
if (!typeName.empty()) {
|
||||
SourceLocation slStart = sharedVar->getLocStart();
|
||||
SourceLocation slEnd = sharedVar->getLocEnd();
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1;
|
||||
std::string varName = sharedVar->getNameAsString();
|
||||
std::string repName = "HIP_DYNAMIC_SHARED(" + typeName + ", " + varName + ")";
|
||||
Replacement Rep(*SM, slStart, repLength, repName);
|
||||
FullSourceLoc fullSL(slStart, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
hipCounter counter = { "HIP_DYNAMIC_SHARED", ConvTypes::CONV_MEM, ApiTypes::API_RUNTIME };
|
||||
Statistics::current().incrementCounter(counter, refName.str());
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool stringLiteral(const MatchFinder::MatchResult &Result) {
|
||||
if (const clang::StringLiteral *sLiteral = Result.Nodes.getNodeAs<clang::StringLiteral>("stringLiteral")) {
|
||||
if (sLiteral->getCharByteWidth() == 1) {
|
||||
StringRef s = sLiteral->getString();
|
||||
SourceManager *SM = Result.SourceManager;
|
||||
processString(s, *SM, sLiteral->getLocStart());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public:
|
||||
Cuda2HipCallback(Replacements& Replace, ast_matchers::MatchFinder *parent, HipifyPPCallbacks *PPCallbacks, const std::string &mainFileName)
|
||||
: Cuda2Hip(Replace, mainFileName), owner(parent), PP(PPCallbacks) {
|
||||
PP->setMatch(this);
|
||||
}
|
||||
|
||||
void run(const MatchFinder::MatchResult &Result) override {
|
||||
if (cudaType(Result)) return;
|
||||
if (cudaCall(Result)) return;
|
||||
if (cudaBuiltin(Result)) return;
|
||||
if (cudaEnumConstantRef(Result)) return;
|
||||
if (cudaLaunchKernel(Result)) return;
|
||||
if (cudaSharedIncompleteArrayVar(Result)) return;
|
||||
if (stringLiteral(Result)) return;
|
||||
}
|
||||
|
||||
private:
|
||||
ast_matchers::MatchFinder *owner;
|
||||
HipifyPPCallbacks *PP;
|
||||
};
|
||||
|
||||
void HipifyPPCallbacks::handleEndSource() {
|
||||
insertHipHeaders(Match, *_sm);
|
||||
}
|
||||
|
||||
void addAllMatchers(ast_matchers::MatchFinder &Finder, Cuda2HipCallback *Callback) {
|
||||
// Rewrite CUDA api calls to hip ones.
|
||||
Finder.addMatcher(
|
||||
callExpr(
|
||||
isExpansionInMainFile(),
|
||||
callee(
|
||||
functionDecl(
|
||||
matchesName("cu.*"),
|
||||
unless(
|
||||
// Clang generates structs with functions on them to represent things like
|
||||
// threadIdx.x. We have other logic to handle those builtins directly, so
|
||||
// we need to suppress the call-handling.
|
||||
// We can't handle those directly in the call-handler without special-casing
|
||||
// it unpleasantly, since the names of the functions are unique only per-struct.
|
||||
matchesName("__fetch_builtin.*")
|
||||
)
|
||||
)
|
||||
)
|
||||
).bind("cudaCall"),
|
||||
Callback
|
||||
);
|
||||
|
||||
// Rewrite all references to CUDA types to their corresponding hip types.
|
||||
Finder.addMatcher(
|
||||
typeLoc(
|
||||
isExpansionInMainFile()
|
||||
).bind("cudaType"),
|
||||
Callback
|
||||
);
|
||||
|
||||
// Replace references to CUDA names in string literals with the equivalent hip names.
|
||||
Finder.addMatcher(stringLiteral(isExpansionInMainFile()).bind("stringLiteral"), Callback);
|
||||
|
||||
// Replace the <<<...>>> language extension with a hip kernel launch
|
||||
Finder.addMatcher(cudaKernelCallExpr(isExpansionInMainFile()).bind("cudaLaunchKernel"), Callback);
|
||||
|
||||
// Replace cuda builtins.
|
||||
Finder.addMatcher(
|
||||
memberExpr(
|
||||
isExpansionInMainFile(),
|
||||
hasObjectExpression(
|
||||
hasType(
|
||||
cxxRecordDecl(
|
||||
matchesName("__cuda_builtin_")
|
||||
)
|
||||
)
|
||||
)
|
||||
).bind("cudaBuiltin"),
|
||||
Callback
|
||||
);
|
||||
|
||||
// Map CUDA enum _values_ to their hip equivalents.
|
||||
Finder.addMatcher(
|
||||
declRefExpr(
|
||||
isExpansionInMainFile(),
|
||||
to(
|
||||
enumConstantDecl(
|
||||
matchesName("cu.*|CU.*")
|
||||
)
|
||||
)
|
||||
).bind("cudaEnumConstantRef"),
|
||||
Callback
|
||||
);
|
||||
|
||||
Finder.addMatcher(
|
||||
varDecl(
|
||||
isExpansionInMainFile(),
|
||||
allOf(
|
||||
hasAttr(attr::CUDAShared),
|
||||
hasType(incompleteArrayType())
|
||||
)
|
||||
).bind("cudaSharedIncompleteArrayVar"),
|
||||
Callback
|
||||
);
|
||||
}
|
||||
|
||||
void copyFile(const std::string& src, const std::string& dst) {
|
||||
std::ifstream source(src, std::ios::binary);
|
||||
std::ofstream dest(dst, std::ios::binary);
|
||||
dest << source.rdbuf();
|
||||
}
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
llcompat::PrintStackTraceOnErrorSignal();
|
||||
|
||||
CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::OneOrMore);
|
||||
std::vector<std::string> fileSources = OptionsParser.getSourcePathList();
|
||||
std::string dst = OutputFilename;
|
||||
if (!dst.empty() && fileSources.size() > 1) {
|
||||
llvm::errs() << "[HIPIFY] conflict: -o and multiple source files are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (NoOutput) {
|
||||
if (Inplace) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -no-output and -inplace options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
if (!dst.empty()) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -no-output and -o options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (Examine) {
|
||||
NoOutput = PrintStats = true;
|
||||
}
|
||||
|
||||
int Result = 0;
|
||||
|
||||
// Arguments for the Statistics print routines.
|
||||
std::unique_ptr<std::ostream> csv = nullptr;
|
||||
llvm::raw_ostream* statPrint = nullptr;
|
||||
if (!OutputStatsFilename.empty()) {
|
||||
csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
|
||||
}
|
||||
if (PrintStats) {
|
||||
statPrint = &llvm::errs();
|
||||
}
|
||||
|
||||
for (const auto & src : fileSources) {
|
||||
if (dst.empty()) {
|
||||
if (Inplace) {
|
||||
dst = src;
|
||||
} else {
|
||||
dst = src + ".hip";
|
||||
}
|
||||
} else if (Inplace) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -o and -inplace options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string tmpFile = src + ".hipify-tmp";
|
||||
|
||||
// Create a copy of the file to work on. When we're done, we'll move this onto the
|
||||
// output (which may mean overwriting the input, if we're in-place).
|
||||
// Should we fail for some reason, we'll just leak this file and not corrupt the input.
|
||||
copyFile(src, tmpFile);
|
||||
|
||||
// Initialise the statistics counters for this file.
|
||||
Statistics::setActive(src);
|
||||
|
||||
// RefactoringTool operates on the file in-place. Giving it the output path is no good,
|
||||
// because that'll break relative includes, and we don't want to overwrite the input file.
|
||||
// So what we do is operate on a copy, which we then move to the output.
|
||||
RefactoringTool Tool(OptionsParser.getCompilations(), tmpFile);
|
||||
ast_matchers::MatchFinder Finder;
|
||||
|
||||
// The Replacements to apply to the file `src`.
|
||||
Replacements& replacementsToUse = llcompat::getReplacements(Tool, tmpFile);
|
||||
HipifyPPCallbacks* PPCallbacks = new HipifyPPCallbacks(replacementsToUse, tmpFile);
|
||||
Cuda2HipCallback Callback(replacementsToUse, &Finder, PPCallbacks, tmpFile);
|
||||
|
||||
addAllMatchers(Finder, &Callback);
|
||||
|
||||
auto action = newFrontendActionFactory(&Finder, PPCallbacks);
|
||||
|
||||
Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("--cuda-host-only", ArgumentInsertPosition::BEGIN));
|
||||
|
||||
// Ensure at least c++11 is used.
|
||||
Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("-std=c++11", ArgumentInsertPosition::BEGIN));
|
||||
#if defined(HIPIFY_CLANG_RES)
|
||||
Tool.appendArgumentsAdjuster(getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES));
|
||||
#endif
|
||||
Tool.appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster());
|
||||
Result += Tool.run(action.get());
|
||||
Tool.clearArgumentsAdjusters();
|
||||
|
||||
LangOptions DefaultLangOptions;
|
||||
IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
|
||||
TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), &*DiagOpts);
|
||||
DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts, &DiagnosticPrinter, false);
|
||||
|
||||
SourceManager SM(Diagnostics, Tool.getFiles());
|
||||
|
||||
Rewriter Rewrite(SM, DefaultLangOptions);
|
||||
if (!Tool.applyAllReplacements(Rewrite)) {
|
||||
DEBUG(dbgs() << "Skipped some replacements.\n");
|
||||
}
|
||||
|
||||
// Either move the tmpfile to the output, or remove it.
|
||||
if (!NoOutput) {
|
||||
Result += Rewrite.overwriteChangedFiles();
|
||||
rename(tmpFile.c_str(), dst.c_str());
|
||||
} else {
|
||||
remove(tmpFile.c_str());
|
||||
}
|
||||
|
||||
Statistics::current().markCompletion();
|
||||
Statistics::current().print(csv.get(), statPrint);
|
||||
|
||||
dst.clear();
|
||||
}
|
||||
|
||||
if (fileSources.size() > 1) {
|
||||
Statistics::printAggregate(csv.get(), statPrint);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
@@ -0,0 +1,460 @@
|
||||
#include "HipifyAction.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "clang/Frontend/CompilerInstance.h"
|
||||
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||
#include "clang/ASTMatchers/ASTMatchers.h"
|
||||
|
||||
#include "LLVMCompat.h"
|
||||
#include "CUDA2HipMap.h"
|
||||
#include "StringUtils.h"
|
||||
#include "ArgParse.h"
|
||||
|
||||
namespace ct = clang::tooling;
|
||||
namespace mat = clang::ast_matchers;
|
||||
|
||||
void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) {
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
|
||||
size_t begin = 0;
|
||||
while ((begin = s.find("cu", begin)) != StringRef::npos) {
|
||||
const size_t end = s.find_first_of(" ", begin + 4);
|
||||
StringRef name = s.slice(begin, end);
|
||||
const auto found = CUDA_RENAMES_MAP().find(name);
|
||||
if (found != CUDA_RENAMES_MAP().end()) {
|
||||
StringRef repName = found->second.hipName;
|
||||
hipCounter counter = {"[string literal]", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.unsupported};
|
||||
Statistics::current().incrementCounter(counter, name.str());
|
||||
|
||||
if (!counter.unsupported) {
|
||||
clang::SourceLocation sl = start.getLocWithOffset(begin + 1);
|
||||
ct::Replacement Rep(SM, sl, name.size(), repName);
|
||||
clang::FullSourceLoc fullSL(sl, SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
}
|
||||
|
||||
if (end == StringRef::npos) {
|
||||
break;
|
||||
}
|
||||
|
||||
begin = end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Look at, and consider altering, a given token.
|
||||
*
|
||||
* If it's not a CUDA identifier, nothing happens.
|
||||
* If it's an unsupported CUDA identifier, a warning is emitted.
|
||||
* Otherwise, the source file is updated with the corresponding hipification.
|
||||
*/
|
||||
void HipifyAction::RewriteToken(const clang::Token& t) {
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
|
||||
// String literals containing CUDA references need fixing...
|
||||
if (t.is(clang::tok::string_literal)) {
|
||||
StringRef s(t.getLiteralData(), t.getLength());
|
||||
RewriteString(unquoteStr(s), t.getLocation());
|
||||
return;
|
||||
} else if (!t.isAnyIdentifier()) {
|
||||
// If it's neither a string nor an identifier, we don't care.
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef name = t.getRawIdentifier();
|
||||
const auto found = CUDA_RENAMES_MAP().find(name);
|
||||
if (found == CUDA_RENAMES_MAP().end()) {
|
||||
// So it's an identifier, but not CUDA? Boring.
|
||||
return;
|
||||
}
|
||||
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
|
||||
clang::SourceLocation sl = t.getLocation();
|
||||
if (found->second.unsupported) {
|
||||
// An unsupported identifier? Curses! Warn the user.
|
||||
clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics();
|
||||
const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier unsupported in hip");
|
||||
DE.Report(sl, ID);
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef repName = found->second.hipName;
|
||||
ct::Replacement Rep(SM, sl, name.size(), repName);
|
||||
clang::FullSourceLoc fullSL(sl, SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
clang::SourceRange getReadRange(clang::SourceManager& SM, const clang::SourceRange& exprRange) {
|
||||
clang::SourceLocation begin = exprRange.getBegin();
|
||||
clang::SourceLocation end = exprRange.getEnd();
|
||||
|
||||
bool beginSafe = !SM.isMacroBodyExpansion(begin) || clang::Lexer::isAtStartOfMacroExpansion(begin, SM, clang::LangOptions{});
|
||||
bool endSafe = !SM.isMacroBodyExpansion(end) || clang::Lexer::isAtEndOfMacroExpansion(end, SM, clang::LangOptions{});
|
||||
|
||||
if (beginSafe && endSafe) {
|
||||
return {SM.getFileLoc(begin), SM.getFileLoc(end)};
|
||||
} else {
|
||||
return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
clang::SourceRange getWriteRange(clang::SourceManager& SM, const clang::SourceRange& exprRange) {
|
||||
clang::SourceLocation begin = exprRange.getBegin();
|
||||
clang::SourceLocation end = exprRange.getEnd();
|
||||
|
||||
// If the range is contained within a macro, update the macro definition.
|
||||
// Otherwise, use the file location and hope for the best.
|
||||
if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) {
|
||||
return {SM.getFileLoc(begin), SM.getFileLoc(end)};
|
||||
}
|
||||
|
||||
return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)};
|
||||
}
|
||||
|
||||
|
||||
StringRef readSourceText(clang::SourceManager& SM, const clang::SourceRange& exprRange) {
|
||||
return clang::Lexer::getSourceText(clang::CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, clang::LangOptions(), nullptr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a string representation of the expression `arg`, unless it's a defaulting function
|
||||
* call argument, in which case get a 0. Used for building argument lists to kernel calls.
|
||||
*/
|
||||
std::string stringifyZeroDefaultedArg(clang::SourceManager& SM, const clang::Expr* arg) {
|
||||
if (clang::isa<clang::CXXDefaultArgExpr>(arg)) {
|
||||
return "0";
|
||||
} else {
|
||||
return readSourceText(SM, arg->getSourceRange());
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc,
|
||||
const clang::Token&,
|
||||
StringRef file_name,
|
||||
bool is_angled,
|
||||
clang::CharSourceRange filename_range,
|
||||
const clang::FileEntry*, StringRef,
|
||||
StringRef, const clang::Module*) {
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
if (!SM.isWrittenInMainFile(hash_loc)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto found = CUDA_INCLUDE_MAP.find(file_name);
|
||||
if (found == CUDA_INCLUDE_MAP.end()) {
|
||||
// Not a CUDA include - don't touch it.
|
||||
return;
|
||||
}
|
||||
|
||||
// Special-casing to avoid duplication of the hip_runtime include.
|
||||
if (found->second.hipName == "hip/hip_runtime.h") {
|
||||
if (insertedRuntimeHeader) {
|
||||
return;
|
||||
}
|
||||
|
||||
insertedRuntimeHeader = true;
|
||||
}
|
||||
|
||||
Statistics::current().incrementCounter(found->second, file_name.str());
|
||||
|
||||
clang::SourceLocation sl = filename_range.getBegin();
|
||||
if (found->second.unsupported) {
|
||||
// An unsupported CUDA header? Oh dear. Print a warning.
|
||||
clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics();
|
||||
DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header"));
|
||||
return;
|
||||
}
|
||||
|
||||
const char *B = SM.getCharacterData(sl);
|
||||
const char *E = SM.getCharacterData(filename_range.getEnd());
|
||||
clang::SmallString<128> includeBuffer;
|
||||
clang::StringRef newInclude;
|
||||
|
||||
// Keep the same include type that the user gave.
|
||||
if (is_angled) {
|
||||
newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer);
|
||||
} else {
|
||||
newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer);
|
||||
}
|
||||
|
||||
ct::Replacement Rep(SM, sl, E - B, newInclude);
|
||||
insertReplacement(Rep, clang::FullSourceLoc{sl, SM});
|
||||
}
|
||||
|
||||
|
||||
bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
StringRef refName = "cudaLaunchKernel";
|
||||
|
||||
const auto* launchKernel = Result.Nodes.getNodeAs<clang::CUDAKernelCallExpr>(refName);
|
||||
if (!launchKernel) {
|
||||
return false;
|
||||
}
|
||||
|
||||
clang::SmallString<40> XStr;
|
||||
llvm::raw_svector_ostream OS(XStr);
|
||||
|
||||
clang::LangOptions DefaultLangOptions;
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
|
||||
const clang::Expr& calleeExpr = *(launchKernel->getCallee());
|
||||
OS << "hipLaunchKernelGGL(" << readSourceText(*SM, calleeExpr.getSourceRange()) << ", ";
|
||||
|
||||
// Next up are the four kernel configuration parameters, the last two of which are optional and default to zero.
|
||||
const clang::CallExpr& config = *(launchKernel->getConfig());
|
||||
|
||||
// Copy the two dimensional arguments verbatim.
|
||||
OS << "dim3(" << readSourceText(*SM, config.getArg(0)->getSourceRange()) << "), ";
|
||||
OS << "dim3(" << readSourceText(*SM, config.getArg(1)->getSourceRange()) << "), ";
|
||||
|
||||
// The stream/memory arguments default to zero if omitted.
|
||||
OS << stringifyZeroDefaultedArg(*SM, config.getArg(2)) << ", ";
|
||||
OS << stringifyZeroDefaultedArg(*SM, config.getArg(3));
|
||||
|
||||
// If there are ordinary arguments to the kernel, just copy them verbatim into our new call.
|
||||
int numArgs = launchKernel->getNumArgs();
|
||||
if (numArgs > 0) {
|
||||
OS << ", ";
|
||||
|
||||
// Start of the first argument.
|
||||
clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart();
|
||||
|
||||
// End of the last argument.
|
||||
clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd();
|
||||
|
||||
OS << readSourceText(*SM, {argStart, argEnd});
|
||||
}
|
||||
|
||||
OS << ")";
|
||||
|
||||
clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()});
|
||||
clang::SourceLocation launchStart = replacementRange.getBegin();
|
||||
clang::SourceLocation launchEnd = replacementRange.getEnd();
|
||||
|
||||
size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart);
|
||||
|
||||
ct::Replacement Rep(*SM, launchStart, length, OS.str());
|
||||
clang::FullSourceLoc fullSL(launchStart, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
hipCounter counter = {"hipLaunchKernelGGL", ConvTypes::CONV_KERN, ApiTypes::API_RUNTIME};
|
||||
Statistics::current().incrementCounter(counter, refName.str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HipifyAction::cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
const clang::MemberExpr* threadIdx = Result.Nodes.getNodeAs<clang::MemberExpr>("cudaBuiltin");
|
||||
if (!threadIdx) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const clang::OpaqueValueExpr* refBase = clang::dyn_cast<clang::OpaqueValueExpr>(threadIdx->getBase());
|
||||
if (!refBase) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const clang::DeclRefExpr* declRef = clang::dyn_cast<clang::DeclRefExpr>(refBase->getSourceExpr());
|
||||
if (!declRef) {
|
||||
return false;
|
||||
}
|
||||
|
||||
clang::SourceLocation sl = threadIdx->getLocStart();
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
StringRef name = declRef->getDecl()->getName();
|
||||
StringRef memberName = threadIdx->getMemberDecl()->getName();
|
||||
size_t pos = memberName.find_first_not_of("__fetch_builtin_");
|
||||
memberName = memberName.slice(pos, memberName.size());
|
||||
clang::SmallString<128> tmpData;
|
||||
name = clang::Twine(name + "." + memberName).toStringRef(tmpData);
|
||||
|
||||
const auto found = CUDA_IDENTIFIER_MAP.find(name);
|
||||
if (found != CUDA_IDENTIFIER_MAP.end()) {
|
||||
Statistics::current().incrementCounter(found->second, name.str());
|
||||
if (!found->second.unsupported) {
|
||||
StringRef repName = found->second.hipName;
|
||||
ct::Replacement Rep(*SM, sl, name.size(), repName);
|
||||
clang::FullSourceLoc fullSL(sl, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
} else {
|
||||
clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics();
|
||||
const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unknown CUDA builtin");
|
||||
DE.Report(sl, ID);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
StringRef refName = "cudaSharedIncompleteArrayVar";
|
||||
auto* sharedVar = Result.Nodes.getNodeAs<clang::VarDecl>(refName);
|
||||
if (!sharedVar) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Example: extern __shared__ uint sRadix1[];
|
||||
if (!sharedVar->hasExternalFormalLinkage()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
clang::QualType QT = sharedVar->getType();
|
||||
std::string typeName;
|
||||
if (QT->isIncompleteArrayType()) {
|
||||
const clang::ArrayType* AT = QT.getTypePtr()->getAsArrayTypeUnsafe();
|
||||
QT = AT->getElementType();
|
||||
if (QT.getTypePtr()->isBuiltinType()) {
|
||||
QT = QT.getCanonicalType();
|
||||
const auto* BT = clang::dyn_cast<clang::BuiltinType>(QT);
|
||||
if (BT) {
|
||||
clang::LangOptions LO;
|
||||
LO.CUDA = true;
|
||||
clang::PrintingPolicy policy(LO);
|
||||
typeName = BT->getName(policy);
|
||||
}
|
||||
} else {
|
||||
typeName = QT.getAsString();
|
||||
}
|
||||
}
|
||||
|
||||
if (!typeName.empty()) {
|
||||
clang::SourceLocation slStart = sharedVar->getLocStart();
|
||||
clang::SourceLocation slEnd = sharedVar->getLocEnd();
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1;
|
||||
std::string varName = sharedVar->getNameAsString();
|
||||
std::string repName = "HIP_DYNAMIC_SHARED(" + typeName + ", " + varName + ")";
|
||||
ct::Replacement Rep(*SM, slStart, repLength, repName);
|
||||
clang::FullSourceLoc fullSL(slStart, *SM);
|
||||
insertReplacement(Rep, fullSL);
|
||||
hipCounter counter = {"HIP_DYNAMIC_SHARED", ConvTypes::CONV_MEM, ApiTypes::API_RUNTIME};
|
||||
Statistics::current().incrementCounter(counter, refName.str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HipifyAction::insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL) {
|
||||
llcompat::insertReplacement(*replacements, rep);
|
||||
if (PrintStats) {
|
||||
rep.getLength();
|
||||
Statistics::current().lineTouched(fullSL.getExpansionLineNumber());
|
||||
Statistics::current().bytesChanged(rep.getLength());
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<clang::ASTConsumer> HipifyAction::CreateASTConsumer(clang::CompilerInstance& CI, llvm::StringRef) {
|
||||
Finder.reset(new clang::ast_matchers::MatchFinder);
|
||||
|
||||
// Replace the <<<...>>> language extension with a hip kernel launch
|
||||
Finder->addMatcher(mat::cudaKernelCallExpr(mat::isExpansionInMainFile()).bind("cudaLaunchKernel"), this);
|
||||
|
||||
// Replace cuda builtins.
|
||||
Finder->addMatcher(
|
||||
mat::memberExpr(
|
||||
mat::isExpansionInMainFile(),
|
||||
mat::hasObjectExpression(
|
||||
mat::hasType(
|
||||
mat::cxxRecordDecl(
|
||||
mat::matchesName("__cuda_builtin_")
|
||||
)
|
||||
)
|
||||
)
|
||||
).bind("cudaBuiltin"),
|
||||
this
|
||||
);
|
||||
|
||||
Finder->addMatcher(
|
||||
mat::varDecl(
|
||||
mat::isExpansionInMainFile(),
|
||||
mat::allOf(
|
||||
mat::hasAttr(clang::attr::CUDAShared),
|
||||
mat::hasType(mat::incompleteArrayType())
|
||||
)
|
||||
).bind("cudaSharedIncompleteArrayVar"),
|
||||
this
|
||||
);
|
||||
|
||||
// Ownership is transferred to the caller...
|
||||
return Finder->newASTConsumer();
|
||||
}
|
||||
|
||||
void HipifyAction::EndSourceFileAction() {
|
||||
// Insert the hip header, if we didn't already do it by accident during substitution.
|
||||
if (!insertedRuntimeHeader) {
|
||||
// It's not sufficient to just replace CUDA headers with hip ones, because numerous CUDA headers are
|
||||
// implicitly included by the compiler. Instead, we _delete_ CUDA headers, and unconditionally insert
|
||||
// one copy of the hip include into every file.
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
|
||||
clang::SourceLocation sl = SM.getLocForStartOfFile(SM.getMainFileID());
|
||||
clang::FullSourceLoc fullSL(sl, SM);
|
||||
ct::Replacement Rep(SM, sl, 0, "#include <hip/hip_runtime.h>\n");
|
||||
insertReplacement(Rep, fullSL);
|
||||
}
|
||||
|
||||
clang::ASTFrontendAction::EndSourceFileAction();
|
||||
}
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
/**
|
||||
* A silly little class to proxy PPCallbacks back to the HipifyAction class.
|
||||
*/
|
||||
class PPCallbackProxy : public clang::PPCallbacks {
|
||||
HipifyAction& hipifyAction;
|
||||
|
||||
public:
|
||||
explicit PPCallbackProxy(HipifyAction& action): hipifyAction(action) {}
|
||||
|
||||
void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token& include_token,
|
||||
StringRef file_name, bool is_angled, clang::CharSourceRange filename_range,
|
||||
const clang::FileEntry* file, StringRef search_path, StringRef relative_path,
|
||||
const clang::Module* imported) override {
|
||||
hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void HipifyAction::ExecuteAction() {
|
||||
clang::Preprocessor& PP = getCompilerInstance().getPreprocessor();
|
||||
clang::SourceManager& SM = getCompilerInstance().getSourceManager();
|
||||
|
||||
// Start lexing the specified input file.
|
||||
const llvm::MemoryBuffer* FromFile = SM.getBuffer(SM.getMainFileID());
|
||||
clang::Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
|
||||
RawLex.SetKeepWhitespaceMode(true);
|
||||
|
||||
// Perform a token-level rewrite of CUDA identifiers to hip ones. The raw-mode lexer gives us enough
|
||||
// information to tell the difference between identifiers, string literals, and "other stuff". It also
|
||||
// ignores preprocessor directives, so this transformation will operate inside preprocessor-deleted
|
||||
// code.
|
||||
clang::Token RawTok;
|
||||
RawLex.LexFromRawLexer(RawTok);
|
||||
while (RawTok.isNot(clang::tok::eof)) {
|
||||
RewriteToken(RawTok);
|
||||
RawLex.LexFromRawLexer(RawTok);
|
||||
}
|
||||
|
||||
// Register yourself as the preprocessor callback, by proxy.
|
||||
PP.addPPCallbacks(std::unique_ptr<PPCallbackProxy>(new PPCallbackProxy(*this)));
|
||||
|
||||
// Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching.
|
||||
clang::ASTFrontendAction::ExecuteAction();
|
||||
}
|
||||
|
||||
void HipifyAction::run(const clang::ast_matchers::MatchFinder::MatchResult& Result) {
|
||||
if (cudaBuiltin(Result)) return;
|
||||
if (cudaLaunchKernel(Result)) return;
|
||||
if (cudaSharedIncompleteArrayVar(Result)) return;
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
#pragma once
|
||||
|
||||
#include "clang/Lex/PPCallbacks.h"
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
#include "clang/Frontend/FrontendAction.h"
|
||||
#include "clang/Tooling/Core/Replacement.h"
|
||||
#include "clang/ASTMatchers/ASTMatchFinder.h"
|
||||
#include "ReplacementsFrontendActionFactory.h"
|
||||
|
||||
namespace ct = clang::tooling;
|
||||
|
||||
/**
|
||||
* A FrontendAction that hipifies CUDA programs.
|
||||
*/
|
||||
class HipifyAction : public clang::ASTFrontendAction,
|
||||
public clang::ast_matchers::MatchFinder::MatchCallback {
|
||||
private:
|
||||
ct::Replacements* replacements;
|
||||
std::unique_ptr<clang::ast_matchers::MatchFinder> Finder;
|
||||
|
||||
/// CUDA implicitly adds its runtime header. We rewrite explicitly-provided CUDA includes with equivalent
|
||||
// ones, and track - using this flag - if the result led to us including the hip runtime header. If it did
|
||||
// not, we insert it at the top of the file when we finish processing it.
|
||||
// This approach means we do the best it's possible to do w.r.t preserving the user's include order.
|
||||
bool insertedRuntimeHeader = false;
|
||||
|
||||
/**
|
||||
* Rewrite a string literal to refer to hip, not CUDA.
|
||||
*/
|
||||
void RewriteString(StringRef s, clang::SourceLocation start);
|
||||
|
||||
/**
|
||||
* Replace a CUDA identifier with the corresponding hip identifier, if applicable.
|
||||
*/
|
||||
void RewriteToken(const clang::Token &t);
|
||||
|
||||
public:
|
||||
explicit HipifyAction(ct::Replacements *replacements):
|
||||
clang::ASTFrontendAction(),
|
||||
replacements(replacements) {}
|
||||
|
||||
// MatchCallback listeners
|
||||
bool cudaBuiltin(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
bool cudaSharedIncompleteArrayVar(const clang::ast_matchers::MatchFinder::MatchResult& Result);
|
||||
|
||||
/**
|
||||
* Called by the preprocessor for each include directive during the non-raw lexing pass.
|
||||
*/
|
||||
void InclusionDirective(clang::SourceLocation hash_loc,
|
||||
const clang::Token &include_token,
|
||||
StringRef file_name,
|
||||
bool is_angled,
|
||||
clang::CharSourceRange filename_range,
|
||||
const clang::FileEntry *file,
|
||||
StringRef search_path,
|
||||
StringRef relative_path,
|
||||
const clang::Module *imported);
|
||||
|
||||
protected:
|
||||
/**
|
||||
* Add a Replacement for the current file. These will all be applied after executing the FrontendAction.
|
||||
*/
|
||||
void insertReplacement(const ct::Replacement& rep, const clang::FullSourceLoc& fullSL);
|
||||
|
||||
/**
|
||||
* FrontendAction entry point.
|
||||
*/
|
||||
void ExecuteAction() override;
|
||||
|
||||
/**
|
||||
* Called at the start of each new file to process.
|
||||
*/
|
||||
void EndSourceFileAction() override;
|
||||
|
||||
/**
|
||||
* MatchCallback API entry point. Called by the AST visitor while searching the AST for things we registered an
|
||||
* interest for.
|
||||
*/
|
||||
void run(const clang::ast_matchers::MatchFinder::MatchResult& Result) override;
|
||||
|
||||
std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance &CI, llvm::StringRef InFile) override;
|
||||
};
|
||||
@@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include "clang/Tooling/Tooling.h"
|
||||
#include "clang/Frontend/FrontendAction.h"
|
||||
#include "clang/Tooling/Core/Replacement.h"
|
||||
|
||||
namespace ct = clang::tooling;
|
||||
|
||||
|
||||
/**
|
||||
* A FrontendActionFactory that propagates a set of Replacements into the FrontendAction.
|
||||
* This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool.
|
||||
*
|
||||
* @tparam T The FrontendAction to create.
|
||||
*/
|
||||
template <typename T>
|
||||
class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory {
|
||||
ct::Replacements* replacements;
|
||||
|
||||
public:
|
||||
explicit ReplacementsFrontendActionFactory(ct::Replacements* r):
|
||||
ct::FrontendActionFactory(),
|
||||
replacements(r) {}
|
||||
|
||||
clang::FrontendAction* create() override {
|
||||
return new T(replacements);
|
||||
}
|
||||
};
|
||||
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
/**
|
||||
* @file Cuda2Hip.cpp
|
||||
*
|
||||
* This file is compiled and linked into clang based hipify tool.
|
||||
*/
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "CUDA2HipMap.h"
|
||||
#include "LLVMCompat.h"
|
||||
#include "HipifyAction.h"
|
||||
#include "ArgParse.h"
|
||||
|
||||
#define DEBUG_TYPE "cuda2hip"
|
||||
|
||||
namespace ct = clang::tooling;
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
void copyFile(const std::string& src, const std::string& dst) {
|
||||
std::ifstream source(src, std::ios::binary);
|
||||
std::ofstream dest(dst, std::ios::binary);
|
||||
dest << source.rdbuf();
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
llcompat::PrintStackTraceOnErrorSignal();
|
||||
|
||||
ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::OneOrMore);
|
||||
std::vector<std::string> fileSources = OptionsParser.getSourcePathList();
|
||||
std::string dst = OutputFilename;
|
||||
if (!dst.empty() && fileSources.size() > 1) {
|
||||
llvm::errs() << "[HIPIFY] conflict: -o and multiple source files are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (NoOutput) {
|
||||
if (Inplace) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -no-output and -inplace options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
if (!dst.empty()) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -no-output and -o options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (Examine) {
|
||||
NoOutput = PrintStats = true;
|
||||
}
|
||||
|
||||
int Result = 0;
|
||||
|
||||
// Arguments for the Statistics print routines.
|
||||
std::unique_ptr<std::ostream> csv = nullptr;
|
||||
llvm::raw_ostream* statPrint = nullptr;
|
||||
if (!OutputStatsFilename.empty()) {
|
||||
csv = std::unique_ptr<std::ostream>(new std::ofstream(OutputStatsFilename, std::ios_base::trunc));
|
||||
}
|
||||
if (PrintStats) {
|
||||
statPrint = &llvm::errs();
|
||||
}
|
||||
|
||||
for (const auto & src : fileSources) {
|
||||
if (dst.empty()) {
|
||||
if (Inplace) {
|
||||
dst = src;
|
||||
} else {
|
||||
dst = src + ".hip";
|
||||
}
|
||||
} else if (Inplace) {
|
||||
llvm::errs() << "[HIPIFY] conflict: both -o and -inplace options are specified.\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string tmpFile = src + ".hipify-tmp";
|
||||
|
||||
// Create a copy of the file to work on. When we're done, we'll move this onto the
|
||||
// output (which may mean overwriting the input, if we're in-place).
|
||||
// Should we fail for some reason, we'll just leak this file and not corrupt the input.
|
||||
copyFile(src, tmpFile);
|
||||
|
||||
// Initialise the statistics counters for this file.
|
||||
Statistics::setActive(src);
|
||||
|
||||
// RefactoringTool operates on the file in-place. Giving it the output path is no good,
|
||||
// because that'll break relative includes, and we don't want to overwrite the input file.
|
||||
// So what we do is operate on a copy, which we then move to the output.
|
||||
ct::RefactoringTool Tool(OptionsParser.getCompilations(), tmpFile);
|
||||
ct::Replacements& replacementsToUse = llcompat::getReplacements(Tool, tmpFile);
|
||||
|
||||
ReplacementsFrontendActionFactory<HipifyAction> actionFactory(&replacementsToUse);
|
||||
|
||||
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN));
|
||||
|
||||
// Ensure at least c++11 is used.
|
||||
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-std=c++11", ct::ArgumentInsertPosition::BEGIN));
|
||||
#if defined(HIPIFY_CLANG_RES)
|
||||
Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES));
|
||||
#endif
|
||||
Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster());
|
||||
|
||||
// Hipify _all_ the things!
|
||||
if (Tool.runAndSave(&actionFactory)) {
|
||||
DEBUG(llvm::dbgs() << "Skipped some replacements.\n");
|
||||
}
|
||||
|
||||
// Either move the tmpfile to the output, or remove it.
|
||||
if (!NoOutput) {
|
||||
rename(tmpFile.c_str(), dst.c_str());
|
||||
} else {
|
||||
remove(tmpFile.c_str());
|
||||
}
|
||||
|
||||
Statistics::current().markCompletion();
|
||||
Statistics::current().print(csv.get(), statPrint);
|
||||
|
||||
dst.clear();
|
||||
}
|
||||
|
||||
if (fileSources.size() > 1) {
|
||||
Statistics::printAggregate(csv.get(), statPrint);
|
||||
}
|
||||
|
||||
return Result;
|
||||
}
|
||||
@@ -50,10 +50,16 @@ THE SOFTWARE.
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
|
||||
// define HIP_ENABLE_PRINTF to enable printf
|
||||
#ifdef HIP_ENABLE_PRINTF
|
||||
#define HCC_ENABLE_ACCELERATOR_PRINTF 1
|
||||
#endif
|
||||
|
||||
//---
|
||||
// Remainder of this file only compiles with HCC
|
||||
#if defined __HCC__
|
||||
#include <grid_launch.h>
|
||||
#include "hc_printf.hpp"
|
||||
//TODO-HCC-GL - change this to typedef.
|
||||
//typedef grid_launch_parm hipLaunchParm ;
|
||||
|
||||
@@ -108,13 +114,12 @@ extern int HIP_TRACE_API;
|
||||
#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
|
||||
// Device compile and not host compile:
|
||||
|
||||
//TODO-HCC enable __HIP_ARCH_HAS_ATOMICS__ when HCC supports these.
|
||||
// 32-bit Atomics:
|
||||
#define __HIP_ARCH_HAS_GLOBAL_INT32_ATOMICS__ (1)
|
||||
#define __HIP_ARCH_HAS_GLOBAL_FLOAT_ATOMIC_EXCH__ (1)
|
||||
#define __HIP_ARCH_HAS_SHARED_INT32_ATOMICS__ (1)
|
||||
#define __HIP_ARCH_HAS_SHARED_FLOAT_ATOMIC_EXCH__ (1)
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (0)
|
||||
#define __HIP_ARCH_HAS_FLOAT_ATOMIC_ADD__ (1)
|
||||
|
||||
// 64-bit Atomics:
|
||||
#define __HIP_ARCH_HAS_GLOBAL_INT64_ATOMICS__ (1)
|
||||
@@ -376,6 +381,27 @@ __device__ void __threadfence_system(void) ;
|
||||
* @}
|
||||
*/
|
||||
|
||||
template<typename std::common_type<
|
||||
decltype(hc_get_group_id),
|
||||
decltype(hc_get_group_size),
|
||||
decltype(hc_get_num_groups),
|
||||
decltype(hc_get_workitem_id)>::type f>
|
||||
class Coordinates {
|
||||
using R = decltype(f(0));
|
||||
|
||||
struct X { __device__ operator R() const { return f(0); } };
|
||||
struct Y { __device__ operator R() const { return f(1); } };
|
||||
struct Z { __device__ operator R() const { return f(2); } };
|
||||
public:
|
||||
static constexpr X x{};
|
||||
static constexpr Y y{};
|
||||
static constexpr Z z{};
|
||||
};
|
||||
|
||||
static constexpr Coordinates<hc_get_group_size> blockDim;
|
||||
static constexpr Coordinates<hc_get_group_id> blockIdx;
|
||||
static constexpr Coordinates<hc_get_num_groups> gridDim;
|
||||
static constexpr Coordinates<hc_get_workitem_id> threadIdx;
|
||||
|
||||
#define hipThreadIdx_x (hc_get_workitem_id(0))
|
||||
#define hipThreadIdx_y (hc_get_workitem_id(1))
|
||||
@@ -420,6 +446,20 @@ static inline __device__ void* memset(void* ptr, int val, size_t size)
|
||||
}
|
||||
|
||||
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
|
||||
#ifdef HC_FEATURE_PRINTF
|
||||
template <typename... All>
|
||||
static inline __device__ void printf(const char* format, All... all) {
|
||||
hc::printf(format, all...);
|
||||
}
|
||||
#else
|
||||
template <typename... All>
|
||||
static inline __device__ void printf(const char* format, All... all) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
@@ -462,7 +502,7 @@ do {\
|
||||
type* var = \
|
||||
(type*)__get_dynamicgroupbaseptr(); \
|
||||
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
#define HIP_DYNAMIC_SHARED_ATTRIBUTE
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -28,7 +28,7 @@ set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0)")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
|
||||
@@ -33,7 +33,7 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION})")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}")
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
|
||||
@@ -37,14 +37,14 @@ else()
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@)")
|
||||
endif()
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
if(@COMPILE_HIP_ATP_MARKER@)
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler, libstdc++-static")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, rocm-profiler")
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@, libstdc++-static")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}, ${HCC_PACKAGE_NAME} = @HCC_PACKAGE_VERSION@")
|
||||
endif()
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
|
||||
@@ -20,7 +20,7 @@ set(CPACK_BINARY_DEB "ON")
|
||||
#set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION}), cuda (>= 7.5)")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
#set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst")
|
||||
#set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
|
||||
@@ -21,7 +21,7 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM")
|
||||
set(CPACK_BINARY_DEB "ON")
|
||||
set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_base (= ${CPACK_PACKAGE_VERSION})")
|
||||
set(CPACK_BINARY_RPM "ON")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "x86_64")
|
||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CPACK_RPM_PACKAGE_AUTOREQPROV " no")
|
||||
set(CPACK_RPM_PACKAGE_REQUIRES "hip_base = ${CPACK_PACKAGE_VERSION}")
|
||||
set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt")
|
||||
|
||||
@@ -5,7 +5,7 @@ endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
all: vcpy_kernel.code runKernel.hip.out defaultDriver.hip.out
|
||||
all: vcpy_kernel.code runKernel.hip.out launchKernelHcc.hip.out
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
@@ -13,8 +13,8 @@ runKernel.hip.out: runKernel.cpp
|
||||
launchKernelHcc.hip.out: launchKernelHcc.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
defaultDriver.hip.out: defaultDriver.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
#defaultDriver.hip.out: defaultDriver.cpp
|
||||
# $(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
vcpy_kernel.code: vcpy_kernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
|
||||
@@ -72,6 +72,7 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -68,6 +68,7 @@ int main(){
|
||||
uint32_t one = 1;
|
||||
|
||||
struct {
|
||||
uint32_t _hidden[6];
|
||||
void * _Ad;
|
||||
void * _Bd;
|
||||
} args;
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
__kernel void memset(char in, __global int* out) {
|
||||
int tx = get_global_id(0);
|
||||
out[tx] = in;
|
||||
}
|
||||
|
||||
|
||||
__kernel void vadd(__global float *Ad, __global float *Bd, __global float *Cd, int N){
|
||||
int tx = get_global_id(0);
|
||||
if(tx < N){
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
}
|
||||
}
|
||||
バイナリファイルは表示されません。
@@ -23,6 +23,11 @@ THE SOFTWARE.
|
||||
#include <hc_math.hpp>
|
||||
#include "device_util.h"
|
||||
|
||||
extern "C" float __ocml_floor_f32(float);
|
||||
extern "C" float __ocml_rint_f32(float);
|
||||
extern "C" float __ocml_ceil_f32(float);
|
||||
extern "C" float __ocml_trunc_f32(float);
|
||||
|
||||
struct holder64Bit{
|
||||
union{
|
||||
double d;
|
||||
@@ -151,19 +156,19 @@ __device__ long long int __double_as_longlong(double x)
|
||||
|
||||
__device__ int __float2int_rd(float x)
|
||||
{
|
||||
return (int)x;
|
||||
return (int)__ocml_floor_f32(x);
|
||||
}
|
||||
__device__ int __float2int_rn(float x)
|
||||
{
|
||||
return (int)x;
|
||||
return (int)__ocml_rint_f32(x);
|
||||
}
|
||||
__device__ int __float2int_ru(float x)
|
||||
{
|
||||
return (int)x;
|
||||
return (int)__ocml_ceil_f32(x);
|
||||
}
|
||||
__device__ int __float2int_rz(float x)
|
||||
{
|
||||
return (int)x;
|
||||
return (int)__ocml_trunc_f32(x);
|
||||
}
|
||||
|
||||
__device__ long long int __float2ll_rd(float x)
|
||||
|
||||
@@ -45,8 +45,8 @@ __device__ void *__hip_hc_malloc(size_t size)
|
||||
{
|
||||
return (void*)nullptr;
|
||||
}
|
||||
uint32_t totalThreads = hipBlockDim_x * hipGridDim_x * hipBlockDim_y * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
|
||||
uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x;
|
||||
uint32_t totalThreads = blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z;
|
||||
uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x;
|
||||
|
||||
uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads;
|
||||
uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads;
|
||||
@@ -932,7 +932,7 @@ __device__ unsigned long long int atomicMax(unsigned long long int* address,
|
||||
template<typename T>
|
||||
__device__ T atomicCAS_impl(T* address, T compare, T val)
|
||||
{
|
||||
// the implementation assumes the atomic is lock-free and
|
||||
// the implementation assumes the atomic is lock-free and
|
||||
// has the same size as the non-atmoic equivalent type
|
||||
static_assert(sizeof(T) == sizeof(std::atomic<T>)
|
||||
, "size mismatch between atomic and non-atomic types");
|
||||
@@ -945,7 +945,7 @@ __device__ T atomicCAS_impl(T* address, T compare, T val)
|
||||
|
||||
T expected = compare;
|
||||
|
||||
// hcc should generate a system scope atomic CAS
|
||||
// hcc should generate a system scope atomic CAS
|
||||
std::atomic_compare_exchange_weak_explicit(u.atomic_address
|
||||
, &expected, val
|
||||
, std::memory_order_acq_rel
|
||||
@@ -1110,8 +1110,8 @@ __device__ void* __get_dynamicgroupbaseptr() {
|
||||
return hc::get_dynamic_group_segment_base_pointer();
|
||||
}
|
||||
|
||||
__host__ void* __get_dynamicgroupbaseptr() {
|
||||
return nullptr;
|
||||
__host__ void* __get_dynamicgroupbaseptr() {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Precise Math Functions
|
||||
|
||||
@@ -92,5 +92,8 @@ namespace hip_impl
|
||||
|
||||
delete static_cast<L*>(locked_stream);
|
||||
locked_stream = nullptr;
|
||||
if(HIP_PROFILE_API) {
|
||||
MARKER_END();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,7 +269,7 @@ hipError_t hipCtxGetSharedMemConfig ( hipSharedMemConfig * pConfig )
|
||||
hipError_t hipCtxSynchronize ( void )
|
||||
{
|
||||
HIP_INIT_API(1);
|
||||
return ihipSynchronize(); //TODP Shall check validity of ctx?
|
||||
return ihipLogStatus(ihipSynchronize()); //TODP Shall check validity of ctx?
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetFlags ( unsigned int* flags )
|
||||
|
||||
@@ -60,12 +60,12 @@ hipError_t ihipGetDeviceCount(int *count)
|
||||
*count = g_deviceCnt;
|
||||
|
||||
if (*count > 0) {
|
||||
e = ihipLogStatus(hipSuccess);
|
||||
e = hipSuccess;
|
||||
} else {
|
||||
e = ihipLogStatus(hipErrorNoDevice);
|
||||
e = hipErrorNoDevice;
|
||||
}
|
||||
} else {
|
||||
e = ihipLogStatus(hipErrorInvalidValue);
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
@@ -73,7 +73,7 @@ hipError_t ihipGetDeviceCount(int *count)
|
||||
hipError_t hipGetDeviceCount(int *count)
|
||||
{
|
||||
HIP_INIT_API(count);
|
||||
return ihipGetDeviceCount(count);
|
||||
return ihipLogStatus(ihipGetDeviceCount(count));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig)
|
||||
@@ -205,7 +205,7 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if(pi == nullptr) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto * hipDevice = ihipGetDevice(device);
|
||||
|
||||
+104
-90
@@ -31,12 +31,9 @@ THE SOFTWARE.
|
||||
|
||||
|
||||
ihipEvent_t::ihipEvent_t(unsigned flags)
|
||||
: _criticalData(this)
|
||||
{
|
||||
_state = hipEventStatusCreated;
|
||||
_stream = NULL;
|
||||
_flags = flags;
|
||||
_timestamp = 0;
|
||||
_type = hipEventTypeIndependent;
|
||||
};
|
||||
|
||||
|
||||
@@ -45,56 +42,45 @@ ihipEvent_t::ihipEvent_t(unsigned flags)
|
||||
void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf,
|
||||
hipStream_t stream, ihipEventType_t eventType)
|
||||
{
|
||||
_state = hipEventStatusRecording;
|
||||
_marker = *cf;
|
||||
_type = eventType;
|
||||
_stream = stream;
|
||||
LockedAccessor_EventCrit_t crit(_criticalData);
|
||||
crit->_eventData.marker(*cf);
|
||||
crit->_eventData._type = eventType;
|
||||
crit->_eventData._stream = stream;
|
||||
crit->_eventData._state = hipEventStatusRecording;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void ihipEvent_t::refereshEventStatus()
|
||||
std::pair<hipEventStatus_t, uint64_t>
|
||||
ihipEvent_t::refreshEventStatus()
|
||||
{
|
||||
bool isReady0 = locked_isReady();
|
||||
bool isReady1;
|
||||
int val = 0;
|
||||
if (_state == hipEventStatusRecording) {
|
||||
// TODO - use completion-future functions to obtain ticks and timestamps:
|
||||
hsa_signal_t *sig = static_cast<hsa_signal_t*> (_marker.get_native_handle());
|
||||
isReady1 = locked_isReady();
|
||||
if (sig) {
|
||||
val = hsa_signal_load_acquire(*sig);
|
||||
if (val == 0) {
|
||||
auto ecd = locked_copyCrit();
|
||||
if (ecd._state == hipEventStatusRecording) {
|
||||
bool isReady1 = ecd._stream->locked_eventIsReady(this);
|
||||
if (isReady1) {
|
||||
LockedAccessor_EventCrit_t eCrit(_criticalData);
|
||||
|
||||
if ((_type == hipEventTypeIndependent) || (_type == hipEventTypeStopCommand)) {
|
||||
_timestamp = _marker.get_end_tick();
|
||||
} else if (_type == hipEventTypeStartCommand) {
|
||||
_timestamp = _marker.get_begin_tick();
|
||||
} else {
|
||||
assert(0); // TODO - move to debug assert
|
||||
_timestamp = 0;
|
||||
}
|
||||
|
||||
_state = hipEventStatusComplete;
|
||||
if ((eCrit->_eventData._type == hipEventTypeIndependent) ||
|
||||
(eCrit->_eventData._type == hipEventTypeStopCommand)) {
|
||||
eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_end_tick();
|
||||
} else if (eCrit->_eventData._type == hipEventTypeStartCommand) {
|
||||
eCrit->_eventData._timestamp = eCrit->_eventData.marker().get_begin_tick();
|
||||
} else {
|
||||
eCrit->_eventData._timestamp = 0;
|
||||
assert(0); // TODO - move to debug assert
|
||||
}
|
||||
|
||||
eCrit->_eventData._state = hipEventStatusComplete;
|
||||
|
||||
return std::pair<hipEventStatus_t, uint64_t> (eCrit->_eventData._state, eCrit->_eventData._timestamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (_state != hipEventStatusComplete) {
|
||||
//printf (" not ready isReady0=%d val=%d isReady1=%d\n", isReady0, val, isReady1);
|
||||
}
|
||||
// Not complete path here:
|
||||
return std::pair<hipEventStatus_t, uint64_t> (ecd._state, ecd._timestamp);
|
||||
}
|
||||
|
||||
|
||||
bool ihipEvent_t::locked_isReady()
|
||||
{
|
||||
return _stream->locked_eventIsReady(this);
|
||||
}
|
||||
|
||||
void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode)
|
||||
{
|
||||
return _stream->locked_eventWaitComplete(this, waitMode);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
|
||||
@@ -136,33 +122,43 @@ hipError_t hipEventCreate(hipEvent_t* event)
|
||||
return ihipLogStatus(ihipEventCreate(event, 0));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream);
|
||||
|
||||
if (event && event->_state != hipEventStatusUnitialized) {
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (event && ecd._state != hipEventStatusUnitialized) {
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
event->_stream = stream;
|
||||
|
||||
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
|
||||
|
||||
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
|
||||
|
||||
//
|
||||
// If default stream , then wait on all queues.
|
||||
ihipCtx_t *ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
|
||||
event->_timestamp = hc::get_system_ticks();
|
||||
event->_state = hipEventStatusComplete;
|
||||
{
|
||||
LockedAccessor_EventCrit_t eCrit(event->criticalData());
|
||||
eCrit->_eventData.marker(hc::completion_future()); // reset event
|
||||
eCrit->_eventData._stream = stream;
|
||||
eCrit->_eventData._timestamp = hc::get_system_ticks();
|
||||
eCrit->_eventData._state = hipEventStatusComplete;
|
||||
}
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
event->_state = hipEventStatusRecording;
|
||||
// Clear timestamps
|
||||
event->_timestamp = 0;
|
||||
|
||||
// Record the event in the stream:
|
||||
stream->locked_recordEvent(event);
|
||||
// Keep a copy outside the critical section so we lock stream first, then event - to avoid deadlock
|
||||
hc::completion_future cf = stream->locked_recordEvent(event);
|
||||
|
||||
{
|
||||
LockedAccessor_EventCrit_t eCrit(event->criticalData());
|
||||
eCrit->_eventData.marker(cf);
|
||||
eCrit->_eventData._stream = stream;
|
||||
eCrit->_eventData._timestamp = 0;
|
||||
eCrit->_eventData._state = hipEventStatusRecording;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -171,15 +167,13 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipEventDestroy(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(event);
|
||||
|
||||
if (event) {
|
||||
event->_state = hipEventStatusUnitialized;
|
||||
|
||||
delete event;
|
||||
event = NULL;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
@@ -191,19 +185,27 @@ hipError_t hipEventSynchronize(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, event);
|
||||
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN, "hipEventSynchronize on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n");
|
||||
}
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (event) {
|
||||
if (event->_state == hipEventStatusUnitialized) {
|
||||
if (ecd._state == hipEventStatusUnitialized) {
|
||||
return ihipLogStatus(hipErrorInvalidResourceHandle);
|
||||
} else if (event->_state == hipEventStatusCreated ) {
|
||||
} else if (ecd._state == hipEventStatusCreated ) {
|
||||
// Created but not actually recorded on any device:
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else if (HIP_SYNC_NULL_STREAM && (event->_stream->isDefaultStream() )) {
|
||||
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream() )) {
|
||||
auto *ctx = ihipGetTlsDefaultCtx();
|
||||
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
ecd._stream->locked_eventWaitComplete(
|
||||
ecd.marker(),
|
||||
(event->_flags & hipEventBlockingSync) ?
|
||||
hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -220,44 +222,50 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop)
|
||||
|
||||
*ms = 0.0f;
|
||||
|
||||
if ((start == nullptr) ||
|
||||
(start->_flags & hipEventDisableTiming) ||
|
||||
(start->_state == hipEventStatusUnitialized) || (start->_state == hipEventStatusCreated) ||
|
||||
(stop == nullptr) ||
|
||||
(stop->_flags & hipEventDisableTiming) ||
|
||||
( stop->_state == hipEventStatusUnitialized) || ( stop->_state == hipEventStatusCreated)) {
|
||||
|
||||
// Both events must be at least recorded else return hipErrorInvalidResourceHandle
|
||||
|
||||
if ((start == nullptr) || (stop == nullptr)) {
|
||||
status = hipErrorInvalidResourceHandle;
|
||||
|
||||
} else {
|
||||
// Refresh status, if still recording...
|
||||
start->refereshEventStatus();
|
||||
stop->refereshEventStatus();
|
||||
|
||||
auto startEcd = start->locked_copyCrit();
|
||||
auto stopEcd = stop->locked_copyCrit();
|
||||
|
||||
if ((start->_state == hipEventStatusComplete) && (stop->_state == hipEventStatusComplete)) {
|
||||
// Common case, we have good information for both events.
|
||||
if ((start->_flags & hipEventDisableTiming) ||
|
||||
(startEcd._state == hipEventStatusUnitialized) || (startEcd._state == hipEventStatusCreated) ||
|
||||
(stop->_flags & hipEventDisableTiming) ||
|
||||
(stopEcd._state == hipEventStatusUnitialized) || (stopEcd._state == hipEventStatusCreated)) {
|
||||
|
||||
int64_t tickDiff = (stop->timestamp() - start->timestamp());
|
||||
// Both events must be at least recorded else return hipErrorInvalidResourceHandle
|
||||
|
||||
status = hipErrorInvalidResourceHandle;
|
||||
|
||||
uint64_t freqHz;
|
||||
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
|
||||
if (freqHz) {
|
||||
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
|
||||
status = hipSuccess;
|
||||
} else {
|
||||
* ms = 0.0f;
|
||||
status = hipErrorInvalidValue;
|
||||
}
|
||||
// Refresh status, if still recording...
|
||||
|
||||
auto startStatus = start->refreshEventStatus(); // pair < state, timestamp >
|
||||
auto stopStatus = stop->refreshEventStatus(); // pair < state, timestamp >
|
||||
|
||||
if ((startStatus.first == hipEventStatusComplete) && (stopStatus.first == hipEventStatusComplete)) {
|
||||
// Common case, we have good information for both events. 'second" is the timestamp:
|
||||
int64_t tickDiff = (stopStatus.second - startStatus.second);
|
||||
|
||||
uint64_t freqHz;
|
||||
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
|
||||
if (freqHz) {
|
||||
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
|
||||
status = hipSuccess;
|
||||
} else {
|
||||
* ms = 0.0f;
|
||||
status = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
|
||||
} else if ((start->_state == hipEventStatusRecording) ||
|
||||
(stop->_state == hipEventStatusRecording)) {
|
||||
} else if ((startStatus.first == hipEventStatusRecording) ||
|
||||
(stopStatus.first == hipEventStatusRecording)) {
|
||||
|
||||
status = hipErrorNotReady;
|
||||
} else {
|
||||
status = hipErrorNotReady;
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -268,7 +276,13 @@ hipError_t hipEventQuery(hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_SPECIAL_API(TRACE_QUERY, event);
|
||||
|
||||
if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) {
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN, "hipEventQuery on event without system-scope fence ; consider creating with hipEventReleaseToSystem\n");
|
||||
}
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if ((ecd._state == hipEventStatusRecording) && !ecd._stream->locked_eventIsReady(event)) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
} else {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
|
||||
+77
-49
@@ -47,6 +47,9 @@ THE SOFTWARE.
|
||||
#include "trace_helper.h"
|
||||
#include "env.h"
|
||||
|
||||
//TODO - create a stream-based debug interface as an additional option for tprintf
|
||||
#define DB_PEER_CTX 0
|
||||
|
||||
|
||||
//=================================================================================================
|
||||
//Global variables:
|
||||
@@ -156,7 +159,7 @@ thread_local TidInfo tls_tidInfo;
|
||||
//=================================================================================================
|
||||
// Top-level "free" functions:
|
||||
//=================================================================================================
|
||||
void recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
{
|
||||
auto apiSeqNum = tls_tidInfo.apiSeqNum();
|
||||
auto tid = tls_tidInfo.tid();
|
||||
@@ -178,10 +181,14 @@ void recordApiTrace(std::string *fullStr, const std::string &apiStr)
|
||||
*fullStr += " ";
|
||||
*fullStr += apiStr;
|
||||
|
||||
uint64_t apiStartTick = getTicks();
|
||||
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
fprintf (stderr, "%s<<hip-api tid:%s%s\n" , API_COLOR, fullStr->c_str(), API_COLOR_END);
|
||||
fprintf (stderr, "%s<<hip-api tid:%s @%lu%s\n" , API_COLOR, fullStr->c_str(), apiStartTick, API_COLOR_END);
|
||||
}
|
||||
|
||||
return apiStartTick;
|
||||
}
|
||||
|
||||
|
||||
@@ -332,12 +339,11 @@ void ihipStream_t::locked_wait()
|
||||
|
||||
// Causes current stream to wait for specified event to complete:
|
||||
// Note this does not provide any kind of host serialization.
|
||||
void ihipStream_t::locked_streamWaitEvent(hipEvent_t event)
|
||||
void ihipStream_t::locked_streamWaitEvent(ihipEventData_t &ecd)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
|
||||
crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope);
|
||||
crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope);
|
||||
}
|
||||
|
||||
|
||||
@@ -345,24 +351,28 @@ void ihipStream_t::locked_streamWaitEvent(hipEvent_t event)
|
||||
// Note this does not provide any kind of host serialization.
|
||||
bool ihipStream_t::locked_eventIsReady(hipEvent_t event)
|
||||
{
|
||||
|
||||
// Event query that returns "Complete" may cause HCC to manipulate
|
||||
// internal queue state so lock the stream's queue here.
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
LockedAccessor_StreamCrit_t scrit(_criticalData);
|
||||
|
||||
return (event->marker().is_ready());
|
||||
LockedAccessor_EventCrit_t ecrit(event->criticalData());
|
||||
|
||||
return (ecrit->_eventData.marker().is_ready());
|
||||
}
|
||||
|
||||
void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode)
|
||||
// Waiting on event can cause HCC to reclaim stream resources - so need to lock the stream.
|
||||
void ihipStream_t::locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode)
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
|
||||
event->marker().wait(waitMode);
|
||||
marker.wait(waitMode);
|
||||
}
|
||||
|
||||
|
||||
// Create a marker in this stream.
|
||||
// Save state in the event so it can track the status of the event.
|
||||
void ihipStream_t::locked_recordEvent(hipEvent_t event)
|
||||
hc::completion_future ihipStream_t::locked_recordEvent(hipEvent_t event)
|
||||
{
|
||||
// Lock the stream to prevent simultaneous access
|
||||
LockedAccessor_StreamCrit_t crit(_criticalData);
|
||||
@@ -378,7 +388,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event)
|
||||
scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope;
|
||||
}
|
||||
|
||||
event->marker(crit->_av.create_marker(scopeFlag));
|
||||
return crit->_av.create_marker(scopeFlag);
|
||||
};
|
||||
|
||||
//=============================================================================
|
||||
@@ -459,7 +469,9 @@ void ihipCtxCriticalBase_t<CtxMutex>::recomputePeerAgents()
|
||||
template<>
|
||||
bool ihipCtxCriticalBase_t<CtxMutex>::isPeerWatcher(const ihipCtx_t *peer)
|
||||
{
|
||||
auto match = std::find(_peers.begin(), _peers.end(), peer);
|
||||
auto match = std::find_if(_peers.begin(), _peers.end(),
|
||||
[=] (const ihipCtx_t *d) { return d->getDeviceNum() == peer->getDeviceNum(); });
|
||||
|
||||
return (match != std::end(_peers));
|
||||
}
|
||||
|
||||
@@ -616,7 +628,7 @@ void ihipDevice_t::locked_reset()
|
||||
|
||||
//FIXME - Calling am_memtracker_reset is really bad since it destroyed all buffers allocated by the HCC runtime as well
|
||||
//such as the printf buffer. Re-initialze the printf buffer as a workaround for now.
|
||||
#if (__hcc_workweek__ >= 17423)
|
||||
#ifdef HC_FEATURE_PRINTF
|
||||
Kalmar::getContext()->initPrintfBuffer();
|
||||
#endif
|
||||
};
|
||||
@@ -700,26 +712,25 @@ int checkAccess(hsa_agent_t agent, hsa_amd_memory_pool_t pool)
|
||||
return access;
|
||||
}
|
||||
|
||||
hsa_status_t get_region_info(hsa_region_t region, void* data)
|
||||
hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data)
|
||||
{
|
||||
hsa_status_t err;
|
||||
hipDeviceProp_t* p_prop = reinterpret_cast<hipDeviceProp_t*>(data);
|
||||
uint32_t region_segment;
|
||||
|
||||
// Get region segment
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, ®ion_segment);
|
||||
// Get pool segment
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, ®ion_segment);
|
||||
ErrorCheck(err);
|
||||
|
||||
switch(region_segment) {
|
||||
case HSA_REGION_SEGMENT_READONLY:
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->totalConstMem)); break;
|
||||
/* case HSA_REGION_SEGMENT_PRIVATE:
|
||||
cout<<"PRIVATE"<<endl; private segment cannot be queried */
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->totalConstMem)); break;
|
||||
case HSA_REGION_SEGMENT_GROUP:
|
||||
err = hsa_region_get_info(region, HSA_REGION_INFO_SIZE, &(p_prop->sharedMemPerBlock)); break;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &(p_prop->sharedMemPerBlock));
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -750,11 +761,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
hipError_t e = hipSuccess;
|
||||
hsa_status_t err;
|
||||
|
||||
// Set some defaults in case we don't find the appropriate regions:
|
||||
prop->totalGlobalMem = 0;
|
||||
prop->totalConstMem = 0;
|
||||
prop-> maxThreadsPerMultiProcessor = 0;
|
||||
prop->regsPerBlock = 0;
|
||||
memset(prop, 0, sizeof(hipDeviceProp_t));
|
||||
|
||||
if (_hsaAgent.handle == -1) {
|
||||
return hipErrorInvalidDevice;
|
||||
@@ -854,15 +861,18 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop-> maxThreadsPerMultiProcessor = prop->warpSize*max_waves_per_cu;
|
||||
|
||||
// Get memory properties
|
||||
err = hsa_agent_iterate_regions(_hsaAgent, get_region_info, prop);
|
||||
err = hsa_amd_agent_iterate_memory_pools(_hsaAgent, get_pool_info, prop);
|
||||
if (err == HSA_STATUS_INFO_BREAK) {
|
||||
err = HSA_STATUS_SUCCESS;
|
||||
}
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Get the size of the region we are using for Accelerator Memory allocations:
|
||||
// Get the size of the pool we are using for Accelerator Memory allocations:
|
||||
hsa_region_t *am_region = static_cast<hsa_region_t*>(_acc.get_hsa_am_region());
|
||||
err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem);
|
||||
DeviceErrorCheck(err);
|
||||
// maxSharedMemoryPerMultiProcessor should be as the same as group memory size.
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group region size.
|
||||
// Group memory will not be paged out, so, the physical memory size is the total shared memory size, and also equal to the group pool size.
|
||||
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
|
||||
|
||||
// Get Max memory clock frequency
|
||||
@@ -882,7 +892,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->arch.hasGlobalFloatAtomicExch = 1;
|
||||
prop->arch.hasSharedInt32Atomics = 1;
|
||||
prop->arch.hasSharedFloatAtomicExch = 1;
|
||||
prop->arch.hasFloatAtomicAdd = 0;
|
||||
prop->arch.hasFloatAtomicAdd = 1; // supported with CAS loop, but is supported
|
||||
prop->arch.hasGlobalInt64Atomics = 1;
|
||||
prop->arch.hasSharedInt64Atomics = 1;
|
||||
prop->arch.hasDoubles = 1;
|
||||
@@ -890,7 +900,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
prop->arch.hasWarpBallot = 1;
|
||||
prop->arch.hasWarpShuffle = 1;
|
||||
prop->arch.hasFunnelShift = 0; // TODO-hcc
|
||||
prop->arch.hasThreadFenceSystem = 0; // TODO-hcc
|
||||
prop->arch.hasThreadFenceSystem = 1;
|
||||
prop->arch.hasSyncThreadsExt = 0; // TODO-hcc
|
||||
prop->arch.hasSurfaceFuncs = 0; // TODO-hcc
|
||||
prop->arch.has3dGrid = 1;
|
||||
@@ -1582,7 +1592,9 @@ void ihipPostLaunchKernel(const char *kernelName, hipStream_t stream, grid_launc
|
||||
tprintf(DB_SYNC, "ihipPostLaunchKernel, unlocking stream\n");
|
||||
|
||||
stream->lockclose_postKernelCommand(kernelName, lp.av);
|
||||
MARKER_END();
|
||||
if(HIP_PROFILE_API) {
|
||||
MARKER_END();
|
||||
}
|
||||
}
|
||||
|
||||
//=================================================================================================
|
||||
@@ -1677,6 +1689,9 @@ const char *ihipErrorString(hipError_t hip_error)
|
||||
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
|
||||
bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo)
|
||||
{
|
||||
if (copyEngineCtx == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure this is a device-to-device copy with all memory available to the requested copy engine
|
||||
//
|
||||
@@ -1684,11 +1699,18 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
||||
if (dstPtrInfo->_sizeBytes == 0) {
|
||||
return false;
|
||||
} else {
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
ihipCtx_t *dstCtx = static_cast<ihipCtx_t*> (dstPtrInfo->_appPtr);
|
||||
#else
|
||||
ihipCtx_t *dstCtx = ihipGetPrimaryCtx(dstPtrInfo->_appId);
|
||||
#endif
|
||||
if (copyEngineCtx != dstCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "dstCrit lock succeeded\n");
|
||||
#if DB_PEER_CTX
|
||||
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " dstCtx =" << dstCtx << " peerCnt="
|
||||
<< ctxCrit->peerCnt() << "\n";
|
||||
#endif
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
@@ -1696,16 +1718,22 @@ bool ihipStream_t::canSeeMemory(const ihipCtx_t *copyEngineCtx, const hc::AmPoin
|
||||
}
|
||||
|
||||
|
||||
|
||||
// TODO - pointer-info stores a deviceID not a context,may have some unusual side-effects here:
|
||||
if (srcPtrInfo->_sizeBytes == 0) {
|
||||
return false;
|
||||
} else {
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
ihipCtx_t *srcCtx = static_cast<ihipCtx_t*> (srcPtrInfo->_appPtr);
|
||||
#else
|
||||
ihipCtx_t *srcCtx = ihipGetPrimaryCtx(srcPtrInfo->_appId);
|
||||
#endif
|
||||
if (copyEngineCtx != srcCtx) {
|
||||
// Only checks peer list if contexts are different
|
||||
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
|
||||
//tprintf(DB_SYNC, "srcCrit lock succeeded\n");
|
||||
#if DB_PEER_CTX
|
||||
std::cerr << "checking peer : copyEngineCtx =" << copyEngineCtx << " srcCtx =" << srcCtx << " peerCnt="
|
||||
<< ctxCrit->peerCnt() << "\n";
|
||||
#endif
|
||||
if (!ctxCrit->isPeerWatcher(copyEngineCtx)) {
|
||||
return false;
|
||||
};
|
||||
@@ -1805,7 +1833,7 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
}
|
||||
} else {
|
||||
*forceUnpinnedCopy = true;
|
||||
tprintf (DB_COPY, "P2P: Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
||||
tprintf (DB_COPY, "Copy engine(dev:%d agent=0x%lx) cannot see both host and device pointers - forcing copy with unpinned engine.\n",
|
||||
*copyDevice ? (*copyDevice)->getDeviceNum() : -1,
|
||||
*copyDevice ? (*copyDevice)->getDevice()->_hsaAgent.handle : 0x0);
|
||||
if (HIP_FAIL_SOC & 0x2) {
|
||||
@@ -1820,10 +1848,11 @@ void ihipStream_t::resolveHcMemcpyDirection(unsigned hipMemKind,
|
||||
|
||||
void printPointerInfo(unsigned dbFlag, const char *tag, const void *ptr, const hc::AmPointerInfo &ptrInfo)
|
||||
{
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d\n",
|
||||
tprintf (dbFlag, " %s=%p baseHost=%p baseDev=%p sz=%zu home_dev=%d tracked=%d isDevMem=%d registered=%d allocSeqNum=%zu, appAllocationFlags=%x, appPtr=%p\n",
|
||||
tag, ptr,
|
||||
ptrInfo._hostPointer, ptrInfo._devicePointer, ptrInfo._sizeBytes,
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged);
|
||||
ptrInfo._appId, ptrInfo._sizeBytes != 0, ptrInfo._isInDeviceMem, !ptrInfo._isAmManaged,
|
||||
ptrInfo._allocSeqNum, ptrInfo._appAllocationFlags, ptrInfo._appPtr);
|
||||
}
|
||||
|
||||
|
||||
@@ -1871,12 +1900,14 @@ void tailorPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeByte
|
||||
};
|
||||
|
||||
|
||||
bool getTailoredPtrInfo(hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
||||
bool getTailoredPtrInfo(const char *tag, hc::AmPointerInfo *ptrInfo, const void * ptr, size_t sizeBytes)
|
||||
{
|
||||
bool tracked = (hc::am_memtracker_getinfo(ptrInfo, ptr) == AM_SUCCESS);
|
||||
printPointerInfo(DB_COPY, tag, ptr, *ptrInfo);
|
||||
|
||||
if (tracked) {
|
||||
tailorPtrInfo(ptrInfo, ptr, sizeBytes);
|
||||
printPointerInfo(DB_COPY, " mod", ptr, *ptrInfo);
|
||||
}
|
||||
|
||||
return tracked;
|
||||
@@ -1906,8 +1937,8 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
// Some code in HCC and in printPointerInfo uses _sizeBytes==0 as an indication ptr is not valid, so check it here:
|
||||
@@ -2034,21 +2065,18 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
|
||||
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
|
||||
#endif
|
||||
bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes);
|
||||
tprintf (DB_COPY, "copyASync dst=%p src=%p, sz=%zu\n", dst, src, sizeBytes);
|
||||
bool dstTracked = getTailoredPtrInfo(" dst", &dstPtrInfo, dst, sizeBytes);
|
||||
bool srcTracked = getTailoredPtrInfo(" src", &srcPtrInfo, src, sizeBytes);
|
||||
|
||||
|
||||
hc::hcCommandKind hcCopyDir;
|
||||
ihipCtx_t *copyDevice;
|
||||
bool forceUnpinnedCopy;
|
||||
resolveHcMemcpyDirection(kind, &dstPtrInfo, &srcPtrInfo, &hcCopyDir, ©Device, &forceUnpinnedCopy);
|
||||
tprintf (DB_COPY, "copyASync copyDev:%d dst=%p (phys_dev:%d, isDevMem:%d) src=%p(phys_dev:%d, isDevMem:%d) sz=%zu dir=%s forceUnpinnedCopy=%d\n",
|
||||
tprintf (DB_COPY, " copyDev:%d dir=%s forceUnpinnedCopy=%d\n",
|
||||
copyDevice ? copyDevice->getDeviceNum():-1,
|
||||
dst, dstPtrInfo._appId, dstPtrInfo._isInDeviceMem,
|
||||
src, srcPtrInfo._appId, srcPtrInfo._isInDeviceMem,
|
||||
sizeBytes, hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
printPointerInfo(DB_COPY, " dst", dst, dstPtrInfo);
|
||||
printPointerInfo(DB_COPY, " src", src, srcPtrInfo);
|
||||
hcMemcpyStr(hcCopyDir), forceUnpinnedCopy);
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
|
||||
+98
-27
@@ -32,10 +32,19 @@ THE SOFTWARE.
|
||||
#include "env.h"
|
||||
|
||||
|
||||
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
|
||||
#if (__hcc_workweek__ < 16354)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
// Use the __appPtr field in the am memtracker to store the context.
|
||||
// Requires a bug fix in HCC
|
||||
#if defined(__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE) and (__HCC_HAS_EXTENDED_AM_MEMTRACKER_UPDATE != 0)
|
||||
#define USE_APP_PTR_FOR_CTX 1
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#define USE_IPC 1
|
||||
|
||||
//---
|
||||
@@ -128,6 +137,7 @@ extern std::vector<ProfTrigger> g_dbStopTriggers;
|
||||
class ihipStream_t;
|
||||
class ihipDevice_t;
|
||||
class ihipCtx_t;
|
||||
struct ihipEventData_t;
|
||||
|
||||
// Color defs for debug messages:
|
||||
#define KNRM "\x1B[0m"
|
||||
@@ -143,10 +153,12 @@ extern const char *API_COLOR;
|
||||
extern const char *API_COLOR_END;
|
||||
|
||||
|
||||
// If set, thread-safety is enforced on all stream functions.
|
||||
// Stream functions will acquire a mutex before entering critical sections.
|
||||
#define STREAM_THREAD_SAFE 1
|
||||
// If set, thread-safety is enforced on all event/stream/ctx/device functions.
|
||||
// Can disable for performance or functional experiments - in this case
|
||||
// the code uses a dummy "no-op" mutex.
|
||||
#define EVENT_THREAD_SAFE 1
|
||||
|
||||
#define STREAM_THREAD_SAFE 1
|
||||
|
||||
#define CTX_THREAD_SAFE 1
|
||||
|
||||
@@ -209,7 +221,8 @@ extern const char *API_COLOR_END;
|
||||
#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
|
||||
#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
|
||||
#define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
|
||||
#define DB_MAX_FLAG 4
|
||||
#define DB_WARN 4 /* 0x10 - warn about sub-optimal or shady behavior */
|
||||
#define DB_MAX_FLAG 5
|
||||
// When adding a new debug flag, also add to the char name table below.
|
||||
//
|
||||
//
|
||||
@@ -226,6 +239,7 @@ static const DbName dbName [] =
|
||||
{KYEL, "sync"},
|
||||
{KCYN, "mem"},
|
||||
{KMAG, "copy"},
|
||||
{KRED, "warn"},
|
||||
};
|
||||
|
||||
|
||||
@@ -244,23 +258,28 @@ static const DbName dbName [] =
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
static inline uint64_t getTicks()
|
||||
{
|
||||
return hc::get_system_ticks();
|
||||
}
|
||||
|
||||
//---
|
||||
extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
extern uint64_t recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
|
||||
#if COMPILE_HIP_ATP_MARKER || (COMPILE_HIP_TRACE_API & 0x1)
|
||||
#define API_TRACE(forceTrace, ...)\
|
||||
uint64_t hipApiStartTick;\
|
||||
{\
|
||||
tls_tidInfo.incApiSeqNum();\
|
||||
if (forceTrace || (HIP_PROFILE_API || (COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL))))) {\
|
||||
std::string apiStr = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
std::string fullStr;\
|
||||
recordApiTrace(&fullStr, apiStr);\
|
||||
hipApiStartTick = recordApiTrace(&fullStr, apiStr);\
|
||||
if (HIP_PROFILE_API == 0x1) {MARKER_BEGIN(__func__, "HIP") }\
|
||||
else if (HIP_PROFILE_API == 0x2) {MARKER_BEGIN(fullStr.c_str(), "HIP"); }\
|
||||
}\
|
||||
}
|
||||
|
||||
#else
|
||||
// Swallow API_TRACE
|
||||
#define API_TRACE(IS_CMD, ...)\
|
||||
@@ -302,7 +321,10 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API & (1<<TRACE_ALL)) {\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
auto ticks = getTicks() - hipApiStartTick;\
|
||||
fprintf(stderr, " %ship-api tid:%d.%lu %-30s ret=%2d (%s)>> +%lu ns%s\n", \
|
||||
(localHipStatus == 0) ? API_COLOR:KRED, tls_tidInfo.tid(),tls_tidInfo.apiSeqNum(), \
|
||||
__func__, localHipStatus, ihipErrorString(localHipStatus), ticks, API_COLOR_END);\
|
||||
}\
|
||||
if (HIP_PROFILE_API) { MARKER_END(); }\
|
||||
localHipStatus;\
|
||||
@@ -371,6 +393,12 @@ class FakeMutex
|
||||
void unlock() { }
|
||||
};
|
||||
|
||||
#if EVENT_THREAD_SAFE
|
||||
typedef std::mutex EventMutex;
|
||||
#else
|
||||
#warning "Stream thread-safe disabled"
|
||||
typedef FakeMutex EventMutex;
|
||||
#endif
|
||||
|
||||
#if STREAM_THREAD_SAFE
|
||||
typedef std::mutex StreamMutex;
|
||||
@@ -521,11 +549,11 @@ public:
|
||||
|
||||
hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); };
|
||||
|
||||
void locked_streamWaitEvent(hipEvent_t event);
|
||||
void locked_recordEvent(hipEvent_t event);
|
||||
void locked_streamWaitEvent(ihipEventData_t & event);
|
||||
hc::completion_future locked_recordEvent(hipEvent_t event);
|
||||
|
||||
bool locked_eventIsReady(hipEvent_t event);
|
||||
void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode);
|
||||
void locked_eventWaitComplete(hc::completion_future &marker, hc::hcWaitMode waitMode);
|
||||
|
||||
ihipStreamCritical_t &criticalData() { return _criticalData; };
|
||||
|
||||
@@ -609,32 +637,76 @@ enum ihipEventType_t {
|
||||
hipEventTypeStopCommand,
|
||||
};
|
||||
|
||||
|
||||
struct ihipEventData_t
|
||||
{
|
||||
ihipEventData_t() {
|
||||
_state = hipEventStatusCreated;
|
||||
_stream = NULL;
|
||||
_timestamp = 0;
|
||||
_type = hipEventTypeIndependent;
|
||||
};
|
||||
|
||||
void marker(const hc::completion_future & marker) { _marker = marker; };
|
||||
hc::completion_future & marker() { return _marker; }
|
||||
uint64_t timestamp() const { return _timestamp; } ;
|
||||
ihipEventType_t type() const { return _type; };
|
||||
|
||||
ihipEventType_t _type;
|
||||
hipEventStatus_t _state;
|
||||
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
private:
|
||||
hc::completion_future _marker;
|
||||
};
|
||||
|
||||
|
||||
//=============================================================================
|
||||
//class ihipEventCriticalBase_t
|
||||
template <typename MUTEX_TYPE>
|
||||
class ihipEventCriticalBase_t : LockedBase<MUTEX_TYPE>
|
||||
{
|
||||
public:
|
||||
ihipEventCriticalBase_t(const ihipEvent_t *parentEvent) :
|
||||
_parent(parentEvent)
|
||||
{}
|
||||
~ihipEventCriticalBase_t() {};
|
||||
|
||||
// Keep data in structure so it can be easily copied into snapshots
|
||||
// (used to reduce lock contention and preserve correct lock order)
|
||||
ihipEventData_t _eventData;
|
||||
|
||||
private:
|
||||
const ihipEvent_t *_parent;
|
||||
friend class LockedAccessor<ihipEventCriticalBase_t>;
|
||||
};
|
||||
|
||||
typedef ihipEventCriticalBase_t<EventMutex> ihipEventCritical_t;
|
||||
|
||||
typedef LockedAccessor<ihipEventCritical_t> LockedAccessor_EventCrit_t;
|
||||
|
||||
// internal hip event structure.
|
||||
class ihipEvent_t {
|
||||
public:
|
||||
ihipEvent_t(unsigned flags);
|
||||
void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType);
|
||||
void refereshEventStatus();
|
||||
hc::completion_future & marker() { return _marker; }
|
||||
void marker(hc::completion_future cf) { _marker = cf; };
|
||||
std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(); // returns pair <state, timestamp>
|
||||
|
||||
bool locked_isReady();
|
||||
void locked_waitComplete(hc::hcWaitMode waitMode);
|
||||
|
||||
uint64_t timestamp() const { return _timestamp; } ;
|
||||
ihipEventType_t type() const { return _type; };
|
||||
// Return a copy of the critical state. The critical data is locked during the copy.
|
||||
ihipEventData_t locked_copyCrit() {
|
||||
LockedAccessor_EventCrit_t crit(_criticalData);
|
||||
return _criticalData._eventData;
|
||||
};
|
||||
|
||||
ihipEventCritical_t &criticalData() { return _criticalData; };
|
||||
|
||||
public:
|
||||
hipEventStatus_t _state;
|
||||
|
||||
hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded
|
||||
unsigned _flags;
|
||||
|
||||
|
||||
private:
|
||||
hc::completion_future _marker;
|
||||
ihipEventType_t _type;
|
||||
uint64_t _timestamp; // store timestamp, may be set on host or by marker.
|
||||
ihipEventCritical_t _criticalData;
|
||||
|
||||
friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream);
|
||||
} ;
|
||||
|
||||
@@ -652,7 +724,6 @@ public:
|
||||
};
|
||||
|
||||
~ihipDeviceCriticalBase_t() {
|
||||
|
||||
}
|
||||
|
||||
// Contexts:
|
||||
|
||||
+17
-9
@@ -61,11 +61,15 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags)
|
||||
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags, ctx);
|
||||
#else
|
||||
hc::am_memtracker_update(ptr, device->_deviceId, hipFlags);
|
||||
#endif
|
||||
|
||||
if (shareWithAll) {
|
||||
hsa_status_t s = hsa_amd_agents_allow_access(g_deviceCnt+1, g_allAgents, NULL, ptr);
|
||||
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
|
||||
tprintf (DB_MEM, " allow access to CPU + all %d GPUs (shareWithAll)\n", g_deviceCnt);
|
||||
if (s != HSA_STATUS_SUCCESS) {
|
||||
ret = -1;
|
||||
}
|
||||
@@ -122,7 +126,7 @@ void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool
|
||||
if (HIP_INIT_ALLOC != -1) {
|
||||
// TODO , dont' call HIP API directly here:
|
||||
hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (ptr != nullptr) {
|
||||
int r = sharePtr(ptr, ctx, shareWithAll, hipFlags);
|
||||
@@ -251,7 +255,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
|
||||
hip_status = hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
@@ -284,10 +288,10 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
}
|
||||
|
||||
|
||||
const unsigned supportedFlags = hipHostMallocPortable
|
||||
| hipHostMallocMapped
|
||||
| hipHostMallocWriteCombined
|
||||
| hipHostMallocCoherent
|
||||
const unsigned supportedFlags = hipHostMallocPortable
|
||||
| hipHostMallocMapped
|
||||
| hipHostMallocWriteCombined
|
||||
| hipHostMallocCoherent
|
||||
| hipHostMallocNonCoherent;
|
||||
|
||||
|
||||
@@ -300,7 +304,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
|
||||
hip_status = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
|
||||
unsigned amFlags = 0;
|
||||
if (flags & hipHostMallocCoherent) {
|
||||
amFlags = amHostCoherent;
|
||||
@@ -581,7 +585,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
|
||||
hsa_ext_image_data_info_t imageInfo;
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
|
||||
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
|
||||
|
||||
|
||||
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment);
|
||||
|
||||
if (size && (*ptr == NULL)) {
|
||||
@@ -660,7 +664,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags)
|
||||
vecAcc.push_back(ihipGetDevice(i)->_acc);
|
||||
}
|
||||
am_status = hc::am_memory_host_lock(device->_acc, hostPtr, sizeBytes, &vecAcc[0], vecAcc.size());
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(hostPtr, device->_deviceId, flags, ctx);
|
||||
#else
|
||||
hc::am_memtracker_update(hostPtr, device->_deviceId, flags);
|
||||
#endif
|
||||
|
||||
tprintf(DB_MEM, " %s registered ptr=%p and allowed access to %zu peers\n", __func__, hostPtr, vecAcc.size());
|
||||
if(am_status == AM_SUCCESS){
|
||||
|
||||
+158
-10
@@ -27,6 +27,7 @@ THE SOFTWARE.
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
@@ -217,7 +218,7 @@ namespace
|
||||
using namespace ELFIO;
|
||||
using namespace std;
|
||||
|
||||
static constexpr pair<Elf64_Addr, Elf_Xword> r{0, 0};
|
||||
static const pair<Elf64_Addr, Elf_Xword> r{0, 0};
|
||||
|
||||
for (auto i = 0u; i != section.get_symbols_num(); ++i) {
|
||||
// TODO: this is boyscout code, caching the temporaries
|
||||
@@ -374,6 +375,8 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname)
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t hmod)
|
||||
{
|
||||
HIP_INIT_API(hmod);
|
||||
|
||||
// TODO - improve this synchronization so it is thread-safe.
|
||||
// Currently we want for all inflight activity to complete, but don't prevent another
|
||||
// thread from launching new kernels before we finish this operation.
|
||||
@@ -408,7 +411,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
if (name == nullptr){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return (hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (ctx == nullptr){
|
||||
@@ -431,7 +434,7 @@ hipError_t ihipModuleGetSymbol(hipFunction_t *func, hipModule_t hmod, const char
|
||||
hsa_executable_symbol_t symbol;
|
||||
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &symbol);
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorNotFound);
|
||||
return hipErrorNotFound;
|
||||
}
|
||||
|
||||
status = hsa_executable_symbol_get_info(symbol,
|
||||
@@ -503,11 +506,11 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
|
||||
kernArgSize = *(size_t*)(config[3]);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
return hipErrorNotInitialized;
|
||||
}
|
||||
|
||||
}else{
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
|
||||
@@ -611,6 +614,125 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Agent_global {
|
||||
std::string name;
|
||||
hipDeviceptr_t address;
|
||||
std::uint32_t byte_cnt;
|
||||
};
|
||||
|
||||
inline
|
||||
void* address(hsa_executable_symbol_t x)
|
||||
{
|
||||
void* r = nullptr;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string name(hsa_executable_symbol_t x)
|
||||
{
|
||||
uint32_t sz = 0u;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &sz);
|
||||
|
||||
std::string r(sz, '\0');
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_NAME, &r.front());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
std::uint32_t size(hsa_executable_symbol_t x)
|
||||
{
|
||||
std::uint32_t r = 0;
|
||||
hsa_executable_symbol_get_info(
|
||||
x, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void track(const Agent_global& x)
|
||||
{
|
||||
tprintf(
|
||||
DB_MEM,
|
||||
" add variable '%s' with ptr=%p size=%u to tracker\n",
|
||||
x.name.c_str(),
|
||||
x.address,
|
||||
x.byte_cnt);
|
||||
|
||||
auto device = ihipGetTlsDefaultCtx()->getWriteableDevice();
|
||||
|
||||
hc::AmPointerInfo ptr_info(
|
||||
nullptr,
|
||||
x.address,
|
||||
x.address,
|
||||
x.byte_cnt,
|
||||
device->_acc,
|
||||
true,
|
||||
false);
|
||||
hc::am_memtracker_add(x.address, ptr_info);
|
||||
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
|
||||
}
|
||||
|
||||
template<typename Container = std::vector<Agent_global>>
|
||||
inline
|
||||
hsa_status_t copy_agent_global_variables(
|
||||
hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* out)
|
||||
{
|
||||
assert(out);
|
||||
|
||||
hsa_symbol_kind_t t = {};
|
||||
hsa_executable_symbol_get_info(x, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &t);
|
||||
|
||||
if (t == HSA_SYMBOL_KIND_VARIABLE) {
|
||||
static_cast<Container*>(out)->push_back(
|
||||
Agent_global{name(x), address(x), size(x)});
|
||||
|
||||
track(static_cast<Container*>(out)->back());
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
inline
|
||||
hsa_agent_t this_agent()
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) throw std::runtime_error{"No active HIP context."};
|
||||
|
||||
auto device = ctx->getDevice();
|
||||
|
||||
if (!device) throw std::runtime_error{"No device available for HIP."};
|
||||
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(device->_deviceId);
|
||||
|
||||
if (!currentDevice) {
|
||||
throw std::runtime_error{"No active device for HIP"};
|
||||
}
|
||||
|
||||
return currentDevice->_hsaAgent;
|
||||
}
|
||||
|
||||
inline
|
||||
std::vector<Agent_global> read_agent_globals(hipModule_t hmodule)
|
||||
{
|
||||
std::vector<Agent_global> r;
|
||||
|
||||
|
||||
hsa_executable_iterate_agent_symbols(
|
||||
hmodule->executable, this_agent(), copy_agent_global_variables, &r);
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char* name)
|
||||
{
|
||||
@@ -623,11 +745,37 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
}
|
||||
else{
|
||||
hipFunction_t func;
|
||||
ihipModuleGetSymbol(&func, hmod, name);
|
||||
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
|
||||
*dptr = reinterpret_cast<void*>(func->_object);
|
||||
return ihipLogStatus(ret);
|
||||
static std::unordered_map<
|
||||
hipModule_t, std::vector<Agent_global>> agent_globals;
|
||||
|
||||
// TODO: this is not particularly robust.
|
||||
if (agent_globals.count(hmod) == 0) {
|
||||
static std::mutex mtx;
|
||||
std::lock_guard<std::mutex> lck{mtx};
|
||||
|
||||
if (agent_globals.count(hmod) == 0) {
|
||||
agent_globals.emplace(hmod, read_agent_globals(hmod));
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: This is unsafe iff some other emplacement triggers rehashing.
|
||||
// It will have to be properly fleshed out in the future.
|
||||
const auto it0 = agent_globals.find(hmod);
|
||||
if (it0 == agent_globals.cend()) {
|
||||
throw std::runtime_error{"agent_globals data structure corrupted."};
|
||||
}
|
||||
|
||||
const auto it1 = std::find_if(
|
||||
it0->second.cbegin(),
|
||||
it0->second.cend(),
|
||||
[=](const Agent_global& x) { return x.name == name; });
|
||||
|
||||
if (it1 == it0->second.cend()) return ihipLogStatus(hipErrorNotFound);
|
||||
|
||||
*dptr = it1->address;
|
||||
*bytes = it1->byte_cnt;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -93,18 +93,23 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (event == nullptr) {
|
||||
e = hipErrorInvalidResourceHandle;
|
||||
|
||||
} else if (event->_state != hipEventStatusUnitialized) {
|
||||
} else if ((ecd._state != hipEventStatusUnitialized) &&
|
||||
(ecd._state != hipEventStatusCreated)) {
|
||||
|
||||
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
|
||||
// conservative wait on host for the specified event to complete:
|
||||
event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
// return _stream->locked_eventWaitComplete(this, waitMode);
|
||||
//
|
||||
ecd._stream->locked_eventWaitComplete(ecd.marker(), (event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive);
|
||||
} else {
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
// This will user create_blocking_marker to wait on the specified queue.
|
||||
stream->locked_streamWaitEvent(event);
|
||||
// This will use create_blocking_marker to wait on the specified queue.
|
||||
stream->locked_streamWaitEvent(ecd);
|
||||
}
|
||||
|
||||
} // else event not recorded, return immediately and don't create marker.
|
||||
@@ -140,7 +145,6 @@ hipError_t hipStreamQuery(hipStream_t stream)
|
||||
//---
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(stream);
|
||||
HIP_INIT_SPECIAL_API(TRACE_SYNC, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
@@ -75,6 +75,18 @@ RUN: %t CMAKE_TEST_NAME <arguments_to_test_executable> EXCLUDE_HIP_PLATFORM <hcc
|
||||
```
|
||||
|
||||
|
||||
#### CMAKECMD command
|
||||
|
||||
The supported syntax for the CMAKECMD command is:
|
||||
```
|
||||
CMAKECMD: <cmake_command> <options_to_cmake_command>
|
||||
```
|
||||
cmake_command: refers to any of the commands supported by ```cmake -E``` as specified in the [cmake documentation](https://cmake.org/cmake/help/latest/manual/cmake.1.html#command-line-tool-mode). Note that the commands are limited by the version of cmake the user is running.
|
||||
options_to_cmake_command: refers to the arguments supported by the specific cmake_command. The arguments are parsed by HIT to replace special markers. The markers supported by HIT are:
|
||||
%S: Refers to the source directory containing the current source file.
|
||||
%B: Refers to the build directory for the current cmake project i.e. CMAKE_CURRENT_BINARY_DIR.
|
||||
|
||||
|
||||
### Running tests:
|
||||
```
|
||||
ctest
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
// RUN: %run_test hipify "%s" "%t" %cuda_args
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
|
||||
/**
|
||||
* Allocate GPU memory for `count` elements of type `T`.
|
||||
*/
|
||||
template<typename T>
|
||||
static T* gpuMalloc(size_t count) {
|
||||
T* ret = nullptr;
|
||||
// CHECK: hipMalloc(&ret, count * sizeof(T));
|
||||
cudaMalloc(&ret, count * sizeof(T));
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// CHECK: #include <hip/hip_runtime.h>
|
||||
#include <cuda.h>
|
||||
|
||||
#define TOKEN_PASTE(X, Y) X ## Y
|
||||
#define ARG_LIST_AS_MACRO a, device_x, device_y
|
||||
@@ -33,8 +35,13 @@ int main(int argc, char* argv[]) {
|
||||
// CHECK: hipMalloc(&device_x, kDataLen * sizeof(float));
|
||||
cudaMalloc(&device_x, kDataLen * sizeof(float));
|
||||
|
||||
#ifdef HERRING
|
||||
// CHECK: hipMalloc(&device_y, kDataLen * sizeof(float));
|
||||
cudaMalloc(&device_y, kDataLen * sizeof(float));
|
||||
#else
|
||||
// CHECK: hipMalloc(&device_y, kDataLen * sizeof(double));
|
||||
cudaMalloc(&device_y, kDataLen * sizeof(double));
|
||||
#endif
|
||||
|
||||
// CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice);
|
||||
cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice);
|
||||
|
||||
@@ -99,6 +99,7 @@ int main(int argc, char **argv)
|
||||
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
|
||||
cuda_device = findCudaDevice(argc, (const char **)argv);
|
||||
|
||||
// CHECK: hipDeviceProp_t deviceProp;
|
||||
cudaDeviceProp deviceProp;
|
||||
// CHECK: checkCudaErrors(hipGetDevice(&cuda_device));
|
||||
checkCudaErrors(cudaGetDevice(&cuda_device));
|
||||
@@ -135,6 +136,7 @@ int main(int argc, char **argv)
|
||||
checkCudaErrors(cudaStreamCreate(&(streams[i])));
|
||||
}
|
||||
|
||||
// CHECK: hipEvent_t start_event, stop_event;
|
||||
// create CUDA event handles
|
||||
cudaEvent_t start_event, stop_event;
|
||||
|
||||
|
||||
@@ -155,6 +155,20 @@ macro(HIT_ADD_FILES _dir _label _parent)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# Run cmake commands
|
||||
execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --cmakeCMDs ${file}
|
||||
OUTPUT_VARIABLE _contents
|
||||
ERROR_QUIET
|
||||
WORKING_DIRECTORY ${_dir}
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE "\n" ";" _contents "${_contents}")
|
||||
string(REGEX REPLACE "%S" ${_dir} _contents "${_contents}")
|
||||
string(REGEX REPLACE "%B" ${CMAKE_CURRENT_BINARY_DIR} _contents "${_contents}")
|
||||
foreach(_cmd ${_contents})
|
||||
string(REGEX REPLACE " " ";" _cmd "${_cmd}")
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E ${_cmd})
|
||||
endforeach()
|
||||
|
||||
# Add tests
|
||||
execute_process(COMMAND ${HIP_SRC_PATH}/tests/hit/parser --runCMDs ${file}
|
||||
OUTPUT_VARIABLE _contents
|
||||
|
||||
+21
-6
@@ -8,7 +8,7 @@ use File::Spec;
|
||||
sub parse_file {
|
||||
my $file = shift;
|
||||
(my $exe = $file) =~ s/\.[^.]+$//g;
|
||||
my (@buildCMDs, @runCMDs, @runNamedCMDs);
|
||||
my (@buildCMDs, @runCMDs, @runNamedCMDs, @cmakeCMDs);
|
||||
if (open (SOURCE, '<:encoding(UTF-8)', "$file")) {
|
||||
while (<SOURCE>) {
|
||||
my $line=$_;
|
||||
@@ -36,10 +36,17 @@ sub parse_file {
|
||||
$line =~ s/\R//g; # Remove line endings
|
||||
push @runNamedCMDs, $line;
|
||||
}
|
||||
# Look for CMAKECMD instructions
|
||||
if ($line =~ /^ \* CMAKECMD:/) {
|
||||
$line =~ s/^ \* CMAKECMD: //g; # Remove " * CMAKECMD: "
|
||||
# Substitute %S -> srcdir and %B -> builddir happens in cmake
|
||||
$line =~ s/\R//g; # Remove line endings
|
||||
push @cmakeCMDs, $line;
|
||||
}
|
||||
}
|
||||
close(SOURCE);
|
||||
}
|
||||
return (\@buildCMDs, \@runCMDs, \@runNamedCMDs);
|
||||
return (\@buildCMDs, \@runCMDs, \@runNamedCMDs, \@cmakeCMDs);
|
||||
}
|
||||
|
||||
# Exit if no arguments specified
|
||||
@@ -53,8 +60,9 @@ my @options = ();
|
||||
my $retBuildCMDs = 0;
|
||||
my $retRunCMDs = 0;
|
||||
my $retRunNamedCMDs = 0;
|
||||
my $retCmakeCMDs = 0;
|
||||
foreach $arg (@ARGV) {
|
||||
if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs) {
|
||||
if ($retBuildCMDs or $retRunCMDs or $retRunNamedCMDs or $retCmakeCMDs) {
|
||||
push (@options, $arg);
|
||||
}
|
||||
if ($arg eq '--buildCMDs') {
|
||||
@@ -66,18 +74,21 @@ foreach $arg (@ARGV) {
|
||||
if ($arg eq '--runNamedCMDs') {
|
||||
$retRunNamedCMDs = 1;
|
||||
}
|
||||
if ($arg eq '--cmakeCMDs') {
|
||||
$retCmakeCMDs = 1;
|
||||
}
|
||||
}
|
||||
|
||||
# Atleast one command needs to be specified
|
||||
if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0)) {
|
||||
die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs> FILENAMEs\n";
|
||||
if (($retBuildCMDs eq 0) and ($retRunCMDs eq 0) and ($retRunNamedCMDs eq 0) and ($retCmakeCMDs eq 0)) {
|
||||
die "Usage: $0 <--buildCMDs|--runCMDs|--runNamedCMDs|--cmakeCMDs> FILENAMEs\n";
|
||||
}
|
||||
|
||||
# Iterate over input files
|
||||
foreach $file (@options) {
|
||||
# Convert absolute path to path relative to working directory
|
||||
my $relfile = File::Spec->abs2rel($file);
|
||||
my ($buildCMDs, $runCMDs, $runNamedCMDs) = parse_file("$relfile");
|
||||
my ($buildCMDs, $runCMDs, $runNamedCMDs, $cmakeCMDs) = parse_file("$relfile");
|
||||
if ($retBuildCMDs) {
|
||||
# print "BuildCMDs:\n";
|
||||
print "$_\n" for @$buildCMDs;
|
||||
@@ -90,6 +101,10 @@ foreach $file (@options) {
|
||||
# print "RunNamedCMDs:\n";
|
||||
print "$_\n" for @$runNamedCMDs;
|
||||
}
|
||||
if ($retCmakeCMDs) {
|
||||
# print "CmakeCMDs:\n";
|
||||
print "$_\n" for @$cmakeCMDs;
|
||||
}
|
||||
}
|
||||
|
||||
# vim: ts=4:sw=4:expandtab:smartindent
|
||||
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
#define NUM_STREAMS 2
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
// Kernel loop designed to execute very slowly... ... ... so we can test timing-related behavior below
|
||||
if(tx == 0){
|
||||
for(int i = 0; i<num;i++){
|
||||
|
||||
@@ -46,7 +46,6 @@ int main(int argc, char *argv[])
|
||||
A_h = new char[Nbytes];
|
||||
|
||||
HIPCHECK ( hipMalloc((void **) &A_d, Nbytes) );
|
||||
A_h = (char*)malloc(Nbytes);
|
||||
|
||||
printf ("Size=%zu memsetval=%2x \n", Nbytes, memsetval);
|
||||
HIPCHECK ( hipMemsetD8(A_d, memsetval, Nbytes) );
|
||||
@@ -61,7 +60,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
hipFree((void *) A_d);
|
||||
free(A_h);
|
||||
delete [] A_h;
|
||||
passed();
|
||||
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ THE SOFTWARE.
|
||||
#define SIZE 64<<2
|
||||
|
||||
__global__ void getSqAbs(hipLaunchParm lp, float *A, float *B, float *C){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
C[tx] = hipCsqabsf(make_hipFloatComplex(A[tx], B[tx]));
|
||||
}
|
||||
|
||||
|
||||
@@ -16,13 +16,13 @@
|
||||
|
||||
__global__ void cpy(hipLaunchParm lp, uint32_t *Out, uint32_t *In)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
memcpy(Out + tx, In + tx, sizeof(uint32_t));
|
||||
}
|
||||
|
||||
__global__ void set(hipLaunchParm lp, uint32_t *ptr, uint8_t val, size_t size)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
memset(ptr + tx, val, (sizeof(uint32_t)*(size/LEN)));
|
||||
}
|
||||
|
||||
@@ -58,6 +58,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
|
||||
|
||||
__global__ void floatMath(hipLaunchParm lp, float *In, float *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Out[tid] = __cosf(In[tid]);
|
||||
Out[tid] = __exp10f(Out[tid]);
|
||||
Out[tid] = __expf(Out[tid]);
|
||||
|
||||
@@ -217,7 +217,7 @@ int computeGold(int *gpuData, const int len)
|
||||
__global__ void testKernel(hipLaunchParm lp,int *g_odata)
|
||||
{
|
||||
// access thread id
|
||||
const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
// Test various atomic instructions
|
||||
|
||||
@@ -240,7 +240,7 @@ __global__ void testKernel(hipLaunchParm lp,int *g_odata)
|
||||
|
||||
// Atomic increment (modulo 17+1)
|
||||
atomicInc((unsigned int *)&g_odata[5], 17);
|
||||
|
||||
|
||||
// Atomic decrement
|
||||
atomicDec((unsigned int *)&g_odata[6], 137);
|
||||
|
||||
|
||||
@@ -32,82 +32,82 @@ THE SOFTWARE.
|
||||
#define SIZE N*sizeof(float)
|
||||
|
||||
__global__ void test_sincosf(hipLaunchParm lp, float* a, float* b, float *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
sincosf(a[tid], b+tid, c+tid);
|
||||
}
|
||||
|
||||
__global__ void test_sincospif(hipLaunchParm lp, float* a, float* b, float *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
sincospif(a[tid], b+tid, c+tid);
|
||||
}
|
||||
|
||||
__global__ void test_fdividef(hipLaunchParm lp, float *a, float* b, float *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
c[tid] = fdividef(a[tid], b[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_llrintf(hipLaunchParm lp, float *a, long long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = llrintf(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_lrintf(hipLaunchParm lp, float *a, long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = lrintf(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rintf(hipLaunchParm lp, float *a, float *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = rintf(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_llroundf(hipLaunchParm lp, float *a, long long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = llroundf(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_lroundf(hipLaunchParm lp, float *a, long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = lroundf(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rhypotf(hipLaunchParm lp, float *a, float* b, float *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
c[tid] = rhypotf(a[tid], b[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_norm3df(hipLaunchParm lp, float *a, float* b, float *c, float *d){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
d[tid] = norm3df(a[tid], b[tid], c[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_norm4df(hipLaunchParm lp, float *a, float* b, float *c, float *d, float *e){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
e[tid] = norm4df(a[tid], b[tid], c[tid], d[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_normf(hipLaunchParm lp, float *a, float *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = normf(N, a);
|
||||
}
|
||||
|
||||
__global__ void test_rnorm3df(hipLaunchParm lp, float *a, float* b, float *c, float *d){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
d[tid] = rnorm3df(a[tid], b[tid], c[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rnorm4df(hipLaunchParm lp, float *a, float* b, float *c, float *d, float *e){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
e[tid] = rnorm4df(a[tid], b[tid], c[tid], d[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rnormf(hipLaunchParm lp, float *a, float *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = rnormf(N, a);
|
||||
}
|
||||
|
||||
__global__ void test_erfinvf(hipLaunchParm lp, float *a, float *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = erff(erfinvf(a[tid]));
|
||||
}
|
||||
|
||||
@@ -139,7 +139,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -174,7 +181,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -205,7 +219,13 @@ for(int i=0;i<512;i++){
|
||||
}
|
||||
}
|
||||
|
||||
free(A);
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -234,7 +254,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -263,7 +288,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -291,7 +321,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -321,7 +356,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -350,7 +390,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -387,7 +432,16 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -427,7 +481,18 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
delete [] E;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
hipFree(Ed);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -457,7 +522,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -489,7 +559,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -525,7 +602,16 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -565,7 +651,18 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
delete [] E;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
hipFree(Ed);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -595,7 +692,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -622,7 +724,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -631,7 +738,7 @@ return false;
|
||||
}
|
||||
|
||||
int main(){
|
||||
if(run_sincosf() && run_sincospif() && run_fdividef() &&
|
||||
if(run_sincosf() && run_sincospif() && run_fdividef() &&
|
||||
run_llrintf() && run_norm3df() && run_norm4df() &&
|
||||
run_normf() && run_rnorm3df() && run_rnorm4df() &&
|
||||
run_rnormf() && run_lroundf() && run_llroundf() &&
|
||||
|
||||
@@ -32,72 +32,72 @@ THE SOFTWARE.
|
||||
#define SIZE N*sizeof(double)
|
||||
|
||||
__global__ void test_sincos(hipLaunchParm lp, double* a, double* b, double *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
sincos(a[tid], b+tid, c+tid);
|
||||
}
|
||||
|
||||
__global__ void test_sincospi(hipLaunchParm lp, double* a, double* b, double *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
sincospi(a[tid], b+tid, c+tid);
|
||||
}
|
||||
|
||||
__global__ void test_llrint(hipLaunchParm lp, double *a, long long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = llrint(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_lrint(hipLaunchParm lp, double *a, long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = lrint(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rint(hipLaunchParm lp, double *a, double *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = rint(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_llround(hipLaunchParm lp, double *a, long long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = llround(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_lround(hipLaunchParm lp, double *a, long int *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = lround(a[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rhypot(hipLaunchParm lp, double *a, double* b, double *c){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
c[tid] = rhypot(a[tid], b[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_norm3d(hipLaunchParm lp, double *a, double* b, double *c, double *d){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
d[tid] = norm3d(a[tid], b[tid], c[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_norm4d(hipLaunchParm lp, double *a, double* b, double *c, double *d, double *e){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
e[tid] = norm4d(a[tid], b[tid], c[tid], d[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rnorm3d(hipLaunchParm lp, double *a, double* b, double *c, double *d){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
d[tid] = rnorm3d(a[tid], b[tid], c[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rnorm4d(hipLaunchParm lp, double *a, double* b, double *c, double *d, double *e){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
e[tid] = rnorm4d(a[tid], b[tid], c[tid], d[tid]);
|
||||
}
|
||||
|
||||
__global__ void test_rnorm(hipLaunchParm lp, double *a, double *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = rnorm(N, a);
|
||||
}
|
||||
|
||||
__global__ void test_erfinv(hipLaunchParm lp, double *a, double *b){
|
||||
int tid = hipThreadIdx_x;
|
||||
int tid = threadIdx.x;
|
||||
b[tid] = erf(erfinv(a[tid]));
|
||||
}
|
||||
|
||||
@@ -128,7 +128,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -163,7 +170,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -193,7 +207,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -221,7 +240,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -249,7 +273,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -278,7 +307,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -306,7 +340,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -343,7 +382,16 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -383,7 +431,18 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
delete [] E;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
hipFree(Ed);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -416,7 +475,14 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -452,7 +518,16 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -492,7 +567,18 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
delete [] D;
|
||||
delete [] E;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
hipFree(Dd);
|
||||
hipFree(Ed);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -522,7 +608,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
@@ -549,7 +640,12 @@ for(int i=0;i<512;i++){
|
||||
passed = 1;
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
|
||||
if(passed == 1){
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ __device__ int globalOut[NUM];
|
||||
|
||||
__global__ void Assign(hipLaunchParm lp, int* Out)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Out[tid] = globalIn[tid];
|
||||
globalOut[tid] = globalIn[tid];
|
||||
}
|
||||
|
||||
@@ -19,16 +19,17 @@ THE SOFTWARE.
|
||||
|
||||
#include <iostream>
|
||||
#include <hip/hip_fp16.h>
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "test_common.h"
|
||||
|
||||
#define LEN 64
|
||||
#define HALF_SIZE 64*sizeof(__half)
|
||||
#define HALF2_SIZE 64*sizeof(__half2)
|
||||
|
||||
#if __HIP_ARCH_GFX803__ > 0
|
||||
#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
|
||||
|
||||
__global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) {
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
__half a = A[tx];
|
||||
__half b = B[tx];
|
||||
__half c = C[tx];
|
||||
@@ -44,7 +45,7 @@ __global__ void __halfMath(hipLaunchParm lp, __half *A, __half *B, __half *C) {
|
||||
}
|
||||
|
||||
__global__ void __half2Math(hipLaunchParm lp, __half2 *A, __half2 *B, __half2 *C) {
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
__half2 a = A[tx];
|
||||
__half2 b = B[tx];
|
||||
__half2 c = C[tx];
|
||||
@@ -61,15 +62,21 @@ __global__ void __half2Math(hipLaunchParm lp, __half2 *A, __half2 *B, __half2 *C
|
||||
#endif
|
||||
|
||||
int main(){
|
||||
__half *A, *B, *C;
|
||||
hipMalloc(&A, HALF_SIZE);
|
||||
hipMalloc(&B, HALF_SIZE);
|
||||
hipMalloc(&C, HALF_SIZE);
|
||||
hipLaunchKernel(__halfMath, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A, B, C);
|
||||
__half2 *A2, *B2, *C2;
|
||||
hipMalloc(&A, HALF2_SIZE);
|
||||
hipMalloc(&B, HALF2_SIZE);
|
||||
hipMalloc(&C, HALF2_SIZE);
|
||||
hipLaunchKernel(__half2Math, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A2, B2, C2);
|
||||
|
||||
__half *A, *B, *C;
|
||||
hipMalloc(&A, HALF_SIZE);
|
||||
hipMalloc(&B, HALF_SIZE);
|
||||
hipMalloc(&C, HALF_SIZE);
|
||||
hipLaunchKernel(__halfMath, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A, B, C);
|
||||
hipFree(A);
|
||||
hipFree(B);
|
||||
hipFree(C);
|
||||
__half2 *A2, *B2, *C2;
|
||||
hipMalloc(&A2, HALF2_SIZE);
|
||||
hipMalloc(&B2, HALF2_SIZE);
|
||||
hipMalloc(&C2, HALF2_SIZE);
|
||||
hipLaunchKernel(__half2Math, dim3(1,1,1), dim3(LEN,1,1), 0, 0, A2, B2, C2);
|
||||
hipFree(A2);
|
||||
hipFree(B2);
|
||||
hipFree(C2);
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void vAdd(hipLaunchParm lp, float *In1, float *In2, float *In3, float *In4, float *Out)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
In4[tid] = In1[tid] + In2[tid];
|
||||
__threadfence();
|
||||
In3[tid] = In3[tid] + In4[tid];
|
||||
|
||||
@@ -37,9 +37,9 @@ __global__ void
|
||||
warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift)
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
device_any[hipThreadIdx_x>>pshift] = __any(tid -77);
|
||||
device_all[hipThreadIdx_x>>pshift] = __all(tid -77);
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
device_any[threadIdx.x>>pshift] = __any(tid -77);
|
||||
device_all[threadIdx.x>>pshift] = __all(tid -77);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@@ -49,7 +49,7 @@ int main(int argc, char *argv[])
|
||||
warpSize = devProp.warpSize;
|
||||
|
||||
int w = warpSize;
|
||||
pshift = 0;
|
||||
pshift = 0;
|
||||
while (w >>= 1) ++pshift;
|
||||
|
||||
printf ("warpSize=%d pshift=%d\n", warpSize, pshift);
|
||||
|
||||
@@ -34,12 +34,12 @@ __global__ void
|
||||
gpu_ballot(hipLaunchParm lp, unsigned int* device_ballot, int Num_Warps_per_Block,int pshift)
|
||||
{
|
||||
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
const unsigned int warp_num = hipThreadIdx_x >> pshift;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const unsigned int warp_num = threadIdx.x >> pshift;
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
|
||||
atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popcll(__ballot(tid - 245)));
|
||||
#else
|
||||
atomicAdd(&device_ballot[warp_num+hipBlockIdx_x*Num_Warps_per_Block],__popc(__ballot(tid - 245)));
|
||||
atomicAdd(&device_ballot[warp_num+blockIdx.x*Num_Warps_per_Block],__popc(__ballot(tid - 245)));
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -72,8 +72,8 @@ HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned long long int* c, unsigned long long int* d, int width, int height)
|
||||
{
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
|
||||
@@ -83,8 +83,8 @@ HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
|
||||
{
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
|
||||
@@ -73,8 +73,8 @@ HIP_kernel(hipLaunchParm lp,
|
||||
int width, int height)
|
||||
{
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
|
||||
@@ -64,8 +64,8 @@ HIP_kernel(hipLaunchParm lp,
|
||||
unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height)
|
||||
{
|
||||
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
|
||||
@@ -57,8 +57,8 @@ vectoradd_float(hipLaunchParm lp,
|
||||
T* a, const T* bm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
|
||||
@@ -40,13 +40,13 @@ THE SOFTWARE.
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
vectoradd_char1(hipLaunchParm lp,
|
||||
char1* a, const char1* bm, const char1* cm, int width, int height)
|
||||
char1* a, const char1* bm, const char1* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
@@ -54,40 +54,40 @@ vectoradd_char1(hipLaunchParm lp,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
vectoradd_char2(hipLaunchParm lp,
|
||||
char2* a, const char2* bm, const char2* cm, int width, int height)
|
||||
char2* a, const char2* bm, const char2* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = make_char2(bm[i].x, bm[i].y) + make_char2(cm[i].x, cm[i].y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
vectoradd_char3(hipLaunchParm lp,
|
||||
char3* a, const char3* bm, const char3* cm, int width, int height)
|
||||
char3* a, const char3* bm, const char3* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
a[i] = make_char3(bm[i].x, bm[i].y, bm[i].z) + make_char3(cm[i].x, cm[i].y, cm[i].z);
|
||||
}
|
||||
}
|
||||
__global__ void
|
||||
__global__ void
|
||||
vectoradd_char4(hipLaunchParm lp,
|
||||
char4* a, const char4* bm, const char4* cm, int width, int height)
|
||||
char4* a, const char4* bm, const char4* cm, int width, int height)
|
||||
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
int i = y * width + x;
|
||||
if ( i < (width * height)) {
|
||||
@@ -99,7 +99,7 @@ vectoradd_char4(hipLaunchParm lp,
|
||||
#if 0
|
||||
__kernel__ void vectoradd_float(float* a, const float* b, const float* c, int width, int height) {
|
||||
|
||||
|
||||
|
||||
int x = blockDimX * blockIdx.x + threadIdx.x;
|
||||
int y = blockDimY * blockIdy.y + threadIdx.y;
|
||||
|
||||
@@ -128,21 +128,21 @@ bool dataTypesRun(){
|
||||
hostA = (T*)malloc(NUM * sizeof(T));
|
||||
hostB = (T*)malloc(NUM * sizeof(T));
|
||||
hostC = (T*)malloc(NUM * sizeof(T));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (T)i;
|
||||
hostC[i] = (T)i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -192,21 +192,21 @@ bool dataTypesRun(){
|
||||
hostA = (T*)malloc(NUM * sizeof(T));
|
||||
hostB = (T*)malloc(NUM * sizeof(T));
|
||||
hostC = (T*)malloc(NUM * sizeof(T));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (T)i;
|
||||
hostC[i] = (T)i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -256,21 +256,21 @@ bool dataTypesRun(){
|
||||
hostA = (T*)malloc(NUM * sizeof(T));
|
||||
hostB = (T*)malloc(NUM * sizeof(T));
|
||||
hostC = (T*)malloc(NUM * sizeof(T));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (T)i;
|
||||
hostC[i] = (T)i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -319,21 +319,21 @@ bool dataTypesRunChar4(){
|
||||
hostA = (T*)malloc(NUM * sizeof(T));
|
||||
hostB = (T*)malloc(NUM * sizeof(T));
|
||||
hostC = (T*)malloc(NUM * sizeof(T));
|
||||
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
hostB[i] = (T)i;
|
||||
hostC[i] = (T)i;
|
||||
}
|
||||
|
||||
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(T)));
|
||||
HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(T)));
|
||||
|
||||
|
||||
HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
HIP_ASSERT(hipMemcpy(deviceC, hostC, NUM*sizeof(T), hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(vectoradd_char1),
|
||||
dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y),
|
||||
0, 0,
|
||||
@@ -368,7 +368,7 @@ bool dataTypesRunChar4(){
|
||||
}
|
||||
|
||||
int main() {
|
||||
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
cout << " System minor " << devProp.minor << endl;
|
||||
|
||||
@@ -36,7 +36,7 @@ THE SOFTWARE.
|
||||
#define SIZE LEN<<2
|
||||
|
||||
__global__ void kernel_trig(hipLaunchParm lp, float *In, float *sin_d, float *cos_d, float *tan_d, float *sin_pd, float *cos_pd){
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
sin_d[tid] = __sinf(In[tid]);
|
||||
cos_d[tid] = __cosf(In[tid]);
|
||||
tan_d[tid] = __tanf(In[tid]);
|
||||
|
||||
@@ -29,7 +29,7 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd, size_t len)
|
||||
{
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if(tx < len)
|
||||
{
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
|
||||
@@ -30,23 +30,29 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void Kern(hipLaunchParm lp, float *A)
|
||||
{
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
A[tx] += 1.0f;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
float *A, *Ad;
|
||||
float A[len];
|
||||
float *Ad;
|
||||
|
||||
for(int i=0;i<len;i++)
|
||||
{
|
||||
A[i] = 1.0f;
|
||||
}
|
||||
|
||||
Ad = (float*)mallocHip(size);
|
||||
memcpyHipH2D(Ad, A, size);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Kern), dim3(len/1024), dim3(1024), 0, 0, A);
|
||||
hipLaunchKernel(
|
||||
HIP_KERNEL_NAME(Kern), dim3(len/1024), dim3(1024), 0, 0, Ad);
|
||||
memcpyHipD2H(A, Ad, size);
|
||||
for(int i=0;i<len;i++)
|
||||
{
|
||||
assert(A[i] == 2.0f);
|
||||
}
|
||||
|
||||
hipFree(Ad);
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ class memManager;
|
||||
template<typename T>
|
||||
__global__ void Add(hipLaunchParm lp, T* Ad, T* Bd, T* Cd, size_t Len)
|
||||
{
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if(tx < Len)
|
||||
{
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
#define SIZE 1024*1024*sizeof(int)
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if(tx == 0){
|
||||
for(int i=0;i<ITER;i++){
|
||||
Ad[tx] += 1;
|
||||
|
||||
@@ -29,7 +29,7 @@ THE SOFTWARE.
|
||||
#define SIZE 1024*1024*sizeof(int)
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if(tx == 0){
|
||||
for(int i=0;i<ITER;i++){
|
||||
Ad[tx] += 1;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
__global__ void Kernel(hipLaunchParm lp, float *Ad){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] += Ad[tx-1];
|
||||
}
|
||||
|
||||
|
||||
@@ -40,8 +40,8 @@ __global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T*
|
||||
T *sdata = reinterpret_cast<T *>(my_sdata);
|
||||
#endif
|
||||
|
||||
size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t tid = hipThreadIdx_x;
|
||||
size_t gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t tid = threadIdx.x;
|
||||
|
||||
// initialize dynamic shared memory
|
||||
if (tid < groupElements) {
|
||||
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void vectorAdd(hipLaunchParm lp, float *Ad, float *Bd) {
|
||||
HIP_DYNAMIC_SHARED(float, sBd);
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
for(int i=0;i<LEN/64;i++) {
|
||||
sBd[tx + i * 64] = Ad[tx + i * 64] + 1.0f;
|
||||
Bd[tx + i * 64] = sBd[tx + i * 64];
|
||||
|
||||
@@ -48,11 +48,12 @@ vectorADD2( hipLaunchParm lp,
|
||||
T *C_d,
|
||||
size_t N)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x ;
|
||||
|
||||
for (size_t i=offset; i<N; i+=stride) {
|
||||
C_d[i] = A_d[i] + B_d[i] ;
|
||||
double foo = __hiloint2double(A_d[i], B_d[i]);
|
||||
C_d[i] = __double2loint(foo) + __double2hiint(foo);//A_d[i] + B_d[i] ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ __global__ void MyKernel (const hipLaunchParm lp, const float *a, const float *b
|
||||
{
|
||||
//KERNELBEGIN;
|
||||
|
||||
unsigned gid = hipThreadIdx_x;
|
||||
unsigned gid = threadIdx.x;
|
||||
if (gid < N) {
|
||||
c[gid] = a[gid] + PlusOne(b[gid]);
|
||||
}
|
||||
@@ -74,8 +74,8 @@ __global__ void MyKernel (const hipLaunchParm lp, const float *a, const float *b
|
||||
void callMyKernel()
|
||||
{
|
||||
float *a, *b, *c;
|
||||
unsigned N;
|
||||
const unsigned blockSize = 256;
|
||||
unsigned N = blockSize;
|
||||
|
||||
hipLaunchKernel(MyKernel, dim3(N/blockSize), dim3(blockSize), 0, 0, a,b,c,N);
|
||||
}
|
||||
@@ -96,13 +96,13 @@ vectorADD(const hipLaunchParm lp,
|
||||
int zuzu = deviceVar + 1;
|
||||
|
||||
|
||||
int b = hipThreadIdx_x;
|
||||
int b = threadIdx.x;
|
||||
int c;
|
||||
#ifdef NOT_YET
|
||||
int a = __shfl_up(x, 1);
|
||||
#endif
|
||||
|
||||
float x;
|
||||
float x = 1.0;
|
||||
float z = sin(x);
|
||||
#ifdef NOT_YET
|
||||
float fastZ = __sin(x);
|
||||
@@ -119,8 +119,8 @@ vectorADD(const hipLaunchParm lp,
|
||||
__syncthreads();
|
||||
|
||||
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x ;
|
||||
|
||||
for (size_t i=offset; i<N; i+=stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#define HIP_ENABLE_PRINTF
|
||||
|
||||
#include"test_common.h"
|
||||
|
||||
__global__ void run_printf(hipLaunchParm lp){
|
||||
printf("Hello World\n");
|
||||
}
|
||||
|
||||
int main(){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(run_printf), dim3(1), dim3(1), 0, 0);
|
||||
hipDeviceSynchronize();
|
||||
passed();
|
||||
}
|
||||
@@ -18,7 +18,7 @@ THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* BUILD: %t %s ../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
@@ -38,7 +38,7 @@ __constant__ int Value[LEN];
|
||||
|
||||
__global__ void Get(hipLaunchParm lp, int *Ad)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tid] = Value[tid];
|
||||
}
|
||||
|
||||
|
||||
@@ -33,12 +33,12 @@ THE SOFTWARE.
|
||||
#define SIZE NUM * 8
|
||||
|
||||
__global__ void Alloc(hipLaunchParm lp, uint64_t *Ptr) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ptr[tid] = (uint64_t)malloc(128);
|
||||
}
|
||||
|
||||
__global__ void Free(hipLaunchParm lp, uint64_t *Ptr) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
free((void*)Ptr[tid]);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,52 +35,52 @@ THE SOFTWARE.
|
||||
#define LEN12 12 * 4
|
||||
|
||||
__global__ void MemCpy8(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memcpy(Out + tid*8, In + tid*8, 8);
|
||||
}
|
||||
|
||||
__global__ void MemCpy9(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memcpy(Out + tid*9, In + tid*9, 9);
|
||||
}
|
||||
|
||||
__global__ void MemCpy10(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memcpy(Out + tid*10, In + tid*10, 10);
|
||||
}
|
||||
|
||||
__global__ void MemCpy11(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memcpy(Out + tid*11, In + tid*11, 11);
|
||||
}
|
||||
|
||||
__global__ void MemCpy12(hipLaunchParm lp, uint8_t *In, uint8_t *Out) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memcpy(Out + tid*12, In + tid*12, 12);
|
||||
}
|
||||
|
||||
__global__ void MemSet8(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memset(In + tid*8, 1, 8);
|
||||
}
|
||||
|
||||
__global__ void MemSet9(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memset(In + tid*9, 1, 9);
|
||||
}
|
||||
|
||||
__global__ void MemSet10(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memset(In + tid*10, 1, 10);
|
||||
}
|
||||
|
||||
__global__ void MemSet11(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memset(In + tid*11, 1, 11);
|
||||
}
|
||||
|
||||
__global__ void MemSet12(hipLaunchParm lp, uint8_t *In) {
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
memset(In + tid*12, 1, 12);
|
||||
}
|
||||
|
||||
@@ -107,9 +107,12 @@ int main(){
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
A = new uint8_t[LEN9];
|
||||
B = new uint8_t[LEN9];
|
||||
@@ -132,9 +135,12 @@ int main(){
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
A = new uint8_t[LEN10];
|
||||
B = new uint8_t[LEN10];
|
||||
@@ -157,9 +163,12 @@ int main(){
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
A = new uint8_t[LEN11];
|
||||
B = new uint8_t[LEN11];
|
||||
@@ -182,9 +191,12 @@ int main(){
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
A = new uint8_t[LEN12];
|
||||
B = new uint8_t[LEN12];
|
||||
@@ -207,9 +219,12 @@ int main(){
|
||||
assert(C[i] == 1);
|
||||
}
|
||||
|
||||
delete A;
|
||||
delete B;
|
||||
delete C;
|
||||
delete [] A;
|
||||
delete [] B;
|
||||
delete [] C;
|
||||
hipFree(Ad);
|
||||
hipFree(Bd);
|
||||
hipFree(Cd);
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -37,9 +37,15 @@ __global__ void vadd_asm(hipLaunchParm lp,
|
||||
float *out,
|
||||
float *in)
|
||||
{
|
||||
int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
asm volatile ("v_add_f32_e32 %0, %1, %2" : "=v" (out[i]) : "v"(in[i]),"v" (out[i]));
|
||||
#ifdef __HIP_PLATFORM_NVCC__
|
||||
asm volatile("add.f32 %0,%1,%2;":"=f"(out[i]):"f"(in[i]),"f"(out[i]));
|
||||
#endif
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
asm volatile ("v_add_f32_e32 %0, %1, %2" : "=v" (out[i]) : "v"(in[i]),"v" (out[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
// CPU implementation of Vector Result
|
||||
@@ -49,7 +55,7 @@ void addCPUReference(
|
||||
{
|
||||
for(unsigned int j=0; j < NUM; j++)
|
||||
{
|
||||
|
||||
|
||||
output[j]= input[j] + output[j];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ __global__ void vmac_asm(hipLaunchParm lp,
|
||||
float *out,
|
||||
float *in)
|
||||
{
|
||||
int i = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int i = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
asm volatile ("v_mac_f32_e32 %0, %2, %3" : "=v" (out[i]) : "0"(out[i]), "v" (a), "v" (in[i]));
|
||||
}
|
||||
@@ -47,7 +47,7 @@ void CPUReference(
|
||||
{
|
||||
for(unsigned int j=0; j < NUM; j++)
|
||||
{
|
||||
|
||||
|
||||
output[j]= a*input[j] + output[j];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,7 +33,7 @@ void
|
||||
__launch_bounds__(256, 2)
|
||||
myKern(hipLaunchParm lp, int *C, const int *A, int N, int xfactor)
|
||||
{
|
||||
int tid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
int tid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
|
||||
if (tid < N) {
|
||||
C[tid] = A[tid];
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
int numDevices = 0;
|
||||
int major,minor;
|
||||
hipDevice_t device;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
for(int i=0;i<numDevices;i++){
|
||||
HIPCHECK(hipDeviceGet(&device,i));
|
||||
HIPCHECK(hipDeviceComputeCapability(&major, &minor, device));
|
||||
HIPASSERT(major >= 0);
|
||||
HIPASSERT(minor >= 0);
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#define len 256
|
||||
|
||||
int main()
|
||||
{
|
||||
int numDevices = 0;
|
||||
char name[len];
|
||||
hipDevice_t device;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
for(int i=0;i<numDevices;i++){
|
||||
HIPCHECK(hipDeviceGet(&device,i));
|
||||
HIPCHECK(hipDeviceGetName(name,len,device));
|
||||
HIPASSERT(name != "");
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Test for checking the functionality of
|
||||
* hipError_t hipDeviceSynchronize();
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include"test_common.h"
|
||||
|
||||
#define _SIZE sizeof(int)*1024*1024
|
||||
#define NUM_STREAMS 2
|
||||
|
||||
__global__ void Iter(hipLaunchParm lp, int *Ad, int num){
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
// Kernel loop designed to execute very slowly... ... ... so we can test timing-related behavior below
|
||||
if(tx == 0){
|
||||
for(int i = 0; i<num;i++){
|
||||
Ad[tx] += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
int *A[NUM_STREAMS];
|
||||
int *Ad[NUM_STREAMS];
|
||||
hipStream_t stream[NUM_STREAMS];
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
HIPCHECK(hipHostMalloc((void**)&A[i], _SIZE, hipHostMallocDefault));
|
||||
A[i][0] = 1;
|
||||
HIPCHECK(hipMalloc((void**)&Ad[i], _SIZE));
|
||||
HIPCHECK(hipStreamCreate(&stream[i]));
|
||||
}
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
HIPCHECK(hipMemcpyAsync(Ad[i], A[i], _SIZE, hipMemcpyHostToDevice, stream[i]));
|
||||
}
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(Iter), dim3(1), dim3(1), 0, stream[i], Ad[i], 1<<30);
|
||||
}
|
||||
for(int i=0;i<NUM_STREAMS;i++){
|
||||
HIPCHECK(hipMemcpyAsync(A[i], Ad[i], _SIZE, hipMemcpyDeviceToHost, stream[i]));
|
||||
}
|
||||
|
||||
|
||||
// This first check but relies on the kernel running for so long that the D2H async memcopy has not started yet.
|
||||
// This will be true in an optimal asynchronous implementation.
|
||||
// Conservative implementations which synchronize the hipMemcpyAsync will fail, ie if HIP_LAUNCH_BLOCKING=true
|
||||
HIPASSERT(1<<30 != A[NUM_STREAMS-1][0]-1);
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HIPASSERT(1<<30 == A[NUM_STREAMS-1][0]-1);
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
int numDevices = 0;
|
||||
size_t totMem;
|
||||
hipDevice_t device;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
for(int i=0;i<numDevices;i++){
|
||||
HIPCHECK(hipDeviceGet(&device,i));
|
||||
HIPCHECK(hipDeviceTotalMem(&totMem,device));
|
||||
HIPASSERT(totMem != 0);
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipGetDevice(int *device);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
int numDevices = 0;
|
||||
int device;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
for(int i=0;i<numDevices;i++){
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
HIPCHECK(hipGetDevice(&device));
|
||||
HIPASSERT(device == i);
|
||||
}
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipRuntimeGetVersion(int* runtimeVersion);
|
||||
* On HIP/HCC path this function returns HIP runtime patch version(a 5 digit code) however on HIP/NVCC path this function return CUDA runtime version.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
int runtimeVersion;
|
||||
HIPCHECK(hipRuntimeGetVersion(&runtimeVersion));
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include<hip/hip_runtime_api.h>
|
||||
#include<iostream>
|
||||
#include"test_common.h"
|
||||
|
||||
int main(){
|
||||
hipFuncCache_t cacheConfig;
|
||||
void *func;
|
||||
hipFuncSetCacheConfig(func, cacheConfig);
|
||||
passed();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t EXCLUDE_HIP_PLATFORM
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main(){
|
||||
int numDevices = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
for(int i=0;i<numDevices;i++){
|
||||
HIPCHECK(hipSetDevice(i));
|
||||
}
|
||||
HIPASSERT(hipErrorInvalidDevice == hipSetDevice(numDevices));
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned flag = 0;
|
||||
HIPCHECK(hipDeviceReset());
|
||||
|
||||
int deviceCount = 0;
|
||||
HIPCHECK(hipGetDeviceCount(&deviceCount));
|
||||
|
||||
for(int j=0;j<deviceCount;j++){
|
||||
|
||||
HIPCHECK(hipSetDevice(j));
|
||||
|
||||
for(int i=0;i<4;i++){
|
||||
flag = 1 << i;
|
||||
printf ("Flag=%x\n", flag);
|
||||
HIPCHECK(hipSetDeviceFlags(flag));
|
||||
//HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
flag = 0;
|
||||
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipGetDevice(int *device);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
hipSetDevice(-1);
|
||||
if(hipPeekAtLastError() != hipSuccess)
|
||||
passed();
|
||||
}
|
||||
@@ -52,7 +52,7 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
if (!(testMask & p_tests)) {
|
||||
return;
|
||||
}
|
||||
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
|
||||
printf ("\ntest 0x%3x: stream=%p waitStart=%d syncMode=%s\n",
|
||||
testMask, stream, waitStart, syncModeString(syncMode));
|
||||
|
||||
size_t sizeBytes = numElements * sizeof(int);
|
||||
@@ -85,8 +85,8 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
HIPCHECK(hipEventSynchronize(start));
|
||||
}
|
||||
|
||||
|
||||
hipError_t expectedStopError = hipSuccess;
|
||||
|
||||
hipError_t expectedStopError = hipSuccess;
|
||||
|
||||
// How to wait for the events to finish:
|
||||
switch (syncMode) {
|
||||
@@ -97,12 +97,12 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
HIPCHECK(hipStreamSynchronize(stream)); // wait for recording to finish...
|
||||
break;
|
||||
case syncStopEvent:
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
HIPCHECK(hipEventSynchronize(stop));
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
|
||||
|
||||
|
||||
float t;
|
||||
|
||||
@@ -111,25 +111,25 @@ void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, hipStream_
|
||||
failed ("start event not in expected state, was %d=%s\n", e, hipGetErrorName(e));
|
||||
}
|
||||
|
||||
if (e == hipSuccess)
|
||||
if (e == hipSuccess)
|
||||
assert (t==0.0f);
|
||||
|
||||
|
||||
|
||||
// stop usually ready unless we skipped the synchronization (syncNone)
|
||||
HIPCHECK_API(hipEventElapsedTime(&t, stop, stop), expectedStopError);
|
||||
if (e == hipSuccess)
|
||||
if (e == hipSuccess)
|
||||
assert (t==0.0f);
|
||||
|
||||
|
||||
e = hipEventElapsedTime(&t, start, stop);
|
||||
HIPCHECK_API(e, expectedStopError);
|
||||
if (expectedStopError == hipSuccess)
|
||||
if (expectedStopError == hipSuccess)
|
||||
assert (t>0.0f);
|
||||
printf ("time=%6.2f error=%s\n", t, hipGetErrorName(e));
|
||||
|
||||
e = hipEventElapsedTime(&t, stop, start);
|
||||
HIPCHECK_API(e, expectedStopError);
|
||||
if (expectedStopError == hipSuccess)
|
||||
if (expectedStopError == hipSuccess)
|
||||
assert (t<0.0f);
|
||||
printf ("negtime=%6.2f error=%s\n", t, hipGetErrorName(e));
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ THE SOFTWARE.
|
||||
#define SIZE LEN*sizeof(float)
|
||||
|
||||
__global__ void Add(hipLaunchParm lp, float *Ad, float *Bd, float *Cd){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
}
|
||||
|
||||
|
||||
@@ -33,13 +33,13 @@
|
||||
#define SIZE LEN*sizeof(float)
|
||||
|
||||
__global__ void Add(float *Ad, float *Bd, float *Cd){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Cd[tx] = Ad[tx] + Bd[tx];
|
||||
}
|
||||
|
||||
|
||||
__global__ void Set(int *Ad, int val){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] = val;
|
||||
}
|
||||
|
||||
@@ -52,13 +52,13 @@ std::vector<std::string> syncMsg = {"event", "stream", "device"};
|
||||
|
||||
void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMethod, std::string msg)
|
||||
{
|
||||
std::cerr << "test: CheckHostPointer " << msg
|
||||
std::cerr << "test: CheckHostPointer " << msg
|
||||
//<< " HIP_COHERENT_HOST_ALLOC=" << HIP_COHERENT_HOST_ALLOC
|
||||
//<< " HIP_EVENT_SYS_RELEASE=" << HIP_EVENT_SYS_RELEASE
|
||||
<< " eventFlags = " << std::hex << eventFlags
|
||||
<< ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "")
|
||||
<< ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "")
|
||||
<< " ptr=" << ptr
|
||||
<< " eventFlags = " << std::hex << eventFlags
|
||||
<< ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "")
|
||||
<< ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "")
|
||||
<< " ptr=" << ptr
|
||||
<< " syncMethod=" << syncMsg[syncMethod] << "\n";
|
||||
|
||||
hipStream_t s;
|
||||
@@ -93,7 +93,7 @@ void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMe
|
||||
default:
|
||||
assert(0);
|
||||
};
|
||||
|
||||
|
||||
for (int i=0; i<numElements; i++) {
|
||||
if (ptr[i] != expected) {
|
||||
printf ("mismatch at %d: %d != %d\n", i, ptr[i], expected);
|
||||
@@ -153,7 +153,7 @@ int main(){
|
||||
size_t sizeBytes = numElements * sizeof (int);
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
{
|
||||
{
|
||||
// Stimulate error condition:
|
||||
int *A = &numElements;
|
||||
HIPCHECK_API(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent|hipHostMallocNonCoherent), hipErrorInvalidValue);
|
||||
@@ -174,7 +174,7 @@ int main(){
|
||||
// agent-scope releases don't provide host visibility, don't use them here:
|
||||
}
|
||||
|
||||
if (1) {
|
||||
if (1) {
|
||||
int *A = nullptr;
|
||||
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent));
|
||||
const char *ptrType = "coherent";
|
||||
@@ -189,14 +189,14 @@ int main(){
|
||||
|
||||
|
||||
// Check defaults:
|
||||
if (1) {
|
||||
if (1) {
|
||||
int *A = nullptr;
|
||||
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes));
|
||||
const char *ptrType = "default";
|
||||
CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType);
|
||||
|
||||
|
||||
CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType);
|
||||
CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType);
|
||||
@@ -206,7 +206,7 @@ int main(){
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
|
||||
@@ -29,13 +29,13 @@ THE SOFTWARE.
|
||||
#include<malloc.h>
|
||||
|
||||
__global__ void Inc(hipLaunchParm lp, float *Ad){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Ad[tx] = Ad[tx] + float(1);
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internalRegister)
|
||||
void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internalRegister)
|
||||
{
|
||||
A = A + offset;
|
||||
numElements -= offset;
|
||||
@@ -56,7 +56,7 @@ void doMemCopy(size_t numElements, int offset, T *A, T *Bh, T *Bd, bool internal
|
||||
HIPCHECK(hipMemset(Bd, 13.0f, sizeBytes));
|
||||
|
||||
|
||||
//
|
||||
//
|
||||
HIPCHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost));
|
||||
|
||||
@@ -81,7 +81,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
const size_t size = N * sizeof(float);
|
||||
|
||||
if (p_tests & 0x1) {
|
||||
if (p_tests & 0x1) {
|
||||
float *A, **Ad;
|
||||
int num_devices;
|
||||
HIPCHECK(hipGetDeviceCount(&num_devices));
|
||||
@@ -115,7 +115,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
|
||||
if (p_tests & 0x6) {
|
||||
if (p_tests & 0x6) {
|
||||
// Sensitize HIP bug if device does not match where the memory was registered.
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
|
||||
@@ -129,7 +129,7 @@ int main(int argc, char *argv[])
|
||||
HIPCHECK(hipMalloc(&Bd, size));
|
||||
|
||||
// TODO - set to 128
|
||||
#define OFFSETS_TO_TRY 128
|
||||
#define OFFSETS_TO_TRY 128
|
||||
assert (N>OFFSETS_TO_TRY);
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
|
||||
@@ -58,7 +58,7 @@ public:
|
||||
|
||||
void offset(int offset) { _offset = offset; };
|
||||
int offset() const { return _offset; };
|
||||
|
||||
|
||||
private:
|
||||
T * _A_d;
|
||||
T* _B_d;
|
||||
@@ -72,7 +72,7 @@ private:
|
||||
|
||||
template<typename T>
|
||||
DeviceMemory<T>::DeviceMemory(size_t numElements)
|
||||
: _maxNumElements(numElements),
|
||||
: _maxNumElements(numElements),
|
||||
_offset(0)
|
||||
{
|
||||
T ** np = nullptr;
|
||||
@@ -93,7 +93,7 @@ DeviceMemory<T>::~DeviceMemory ()
|
||||
HipTest::freeArrays (_A_d, _B_d, _C_d, np, np, np, 0);
|
||||
|
||||
HIPCHECK (hipFree(_C_dd));
|
||||
|
||||
|
||||
_C_dd = NULL;
|
||||
};
|
||||
|
||||
@@ -125,7 +125,7 @@ public:
|
||||
T * A_hh;
|
||||
T* B_hh;
|
||||
|
||||
bool _usePinnedHost;
|
||||
bool _usePinnedHost;
|
||||
private:
|
||||
size_t _maxNumElements;
|
||||
|
||||
@@ -165,11 +165,11 @@ HostMemory<T>::HostMemory(size_t numElements, bool usePinnedHost)
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
HostMemory<T>::reset(size_t numElements, bool full)
|
||||
HostMemory<T>::reset(size_t numElements, bool full)
|
||||
{
|
||||
// Initialize the host data:
|
||||
for (size_t i=0; i<numElements; i++) {
|
||||
(A_hh)[i] = 1097.0 + i;
|
||||
(A_hh)[i] = 1097.0 + i;
|
||||
(B_hh)[i] = 1492.0 + i; // Phi
|
||||
|
||||
if (full) {
|
||||
@@ -213,8 +213,8 @@ template <typename T>
|
||||
void memcpytest2(DeviceMemory<T> *dmem, HostMemory<T> *hmem, size_t numElements, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault)
|
||||
{
|
||||
size_t sizeElements = numElements * sizeof(T);
|
||||
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
|
||||
__func__,
|
||||
printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d, offsets:dev:%+d host:+%d\n",
|
||||
__func__,
|
||||
TYPENAME(T),
|
||||
sizeElements, sizeElements/1024.0/1024.0,
|
||||
hmem->_usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault,
|
||||
@@ -273,8 +273,8 @@ void memcpytest2_for_type(size_t numElements)
|
||||
{
|
||||
printSep();
|
||||
|
||||
DeviceMemory<T> memD(numElements);
|
||||
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(numElements);
|
||||
HostMemory<T> memU(numElements, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(numElements, 1/*usePinnedHost*/);
|
||||
|
||||
for (int usePinnedHost =0; usePinnedHost<=1; usePinnedHost++) {
|
||||
@@ -307,11 +307,11 @@ void memcpytest2_sizes(size_t maxElem=0)
|
||||
maxElem = free/sizeof(T)/8;
|
||||
}
|
||||
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
|
||||
|
||||
for (size_t elem=1; elem<=maxElem; elem*=2) {
|
||||
@@ -336,11 +336,11 @@ void memcpytest2_offsets(size_t maxElem, bool devOffsets, bool hostOffsets)
|
||||
HIPCHECK(hipMemGetInfo(&free, &total));
|
||||
|
||||
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB\n",
|
||||
deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0);
|
||||
HIPCHECK ( hipDeviceReset() );
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<T> memD(maxElem);
|
||||
HostMemory<T> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<T> memP(maxElem, 1/*usePinnedHost*/);
|
||||
|
||||
size_t elem = maxElem / 2;
|
||||
@@ -380,16 +380,16 @@ void multiThread_1(bool serialize, bool usePinnedHost)
|
||||
{
|
||||
printSep();
|
||||
printf ("test: %s<%s> serialize=%d usePinnedHost=%d\n", __func__, TYPENAME(T), serialize, usePinnedHost);
|
||||
DeviceMemory<T> memD(N);
|
||||
HostMemory<T> mem1(N, usePinnedHost);
|
||||
HostMemory<T> mem2(N, usePinnedHost);
|
||||
DeviceMemory<T> memD(N);
|
||||
HostMemory<T> mem1(N, usePinnedHost);
|
||||
HostMemory<T> mem2(N, usePinnedHost);
|
||||
|
||||
std::thread t1 (memcpytest2<T>, &memD, &mem1, N, 0,0,0);
|
||||
if (serialize) {
|
||||
t1.join();
|
||||
}
|
||||
|
||||
|
||||
|
||||
std::thread t2 (memcpytest2<T>,&memD, &mem2, N, 0,0,0);
|
||||
if (serialize) {
|
||||
t2.join();
|
||||
@@ -427,21 +427,21 @@ int main(int argc, char *argv[])
|
||||
// Some tests around the 64KB boundary which have historically shown issues:
|
||||
printf ("\n\n=== tests&0x2 (64KB boundary)\n");
|
||||
size_t maxElem = 32*1024*1024;
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
|
||||
DeviceMemory<float> memD(maxElem);
|
||||
HostMemory<float> memU(maxElem, 0/*usePinnedHost*/);
|
||||
HostMemory<float> memP(maxElem, 0/*usePinnedHost*/);
|
||||
// These all pass:
|
||||
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 15*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+16*1024, 0, 0, 0);
|
||||
|
||||
// Just over 64MB:
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 16*1024*1024+512*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 17*1024*1024+1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memU, 32*1024*1024, 0, 0, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
memcpytest2<float>(&memD, &memP, 32*1024*1024, 1, 1, 0);
|
||||
|
||||
|
||||
}
|
||||
@@ -464,7 +464,7 @@ int main(int argc, char *argv[])
|
||||
|
||||
// Simplest cases: serialize the threads, and also used pinned memory:
|
||||
// This verifies that the sub-calls to memcpytest2 are correct.
|
||||
multiThread_1<float>(true, true);
|
||||
multiThread_1<float>(true, true);
|
||||
|
||||
// Serialize, but use unpinned memory to stress the unpinned memory xfer path.
|
||||
multiThread_1<float>(true, false);
|
||||
|
||||
@@ -59,7 +59,7 @@ struct HostTraits<Pinned>
|
||||
static const char *Name() { return "Pinned"; } ;
|
||||
|
||||
static void *Alloc(size_t sizeBytes) {
|
||||
void *p;
|
||||
void *p;
|
||||
HIPCHECK(hipHostMalloc((void**)&p, sizeBytes, hipHostMallocDefault));
|
||||
return p;
|
||||
};
|
||||
@@ -67,11 +67,11 @@ struct HostTraits<Pinned>
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void
|
||||
__global__ void
|
||||
addK (hipLaunchParm lp, T *A, T K, size_t numElements)
|
||||
{
|
||||
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t stride = hipBlockDim_x * hipGridDim_x ;
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x ;
|
||||
|
||||
for (size_t i=offset; i<numElements; i+=stride) {
|
||||
A[i] = A[i] + K;
|
||||
@@ -85,7 +85,7 @@ addK (hipLaunchParm lp, T *A, T K, size_t numElements)
|
||||
//IN: numInflight : number of copies inflight at any time:
|
||||
//IN: numPongs = number of iterations to run (iteration)
|
||||
template<typename T, class AllocType>
|
||||
void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide)
|
||||
void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs, bool doHostSide)
|
||||
{
|
||||
HIPASSERT(numElements % numInflight == 0); // Must be evenly divisible.
|
||||
size_t Nbytes = numElements*sizeof(T);
|
||||
@@ -95,7 +95,7 @@ void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
|
||||
|
||||
printf ("-----------------------------------------------------------------------------------------------\n");
|
||||
printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
|
||||
printf ("testing: %s<%s> Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
|
||||
__func__, HostTraits<AllocType>::Name(), Nbytes, (double)(Nbytes)/1024.0/1024.0, numPongs, numInflight, eachCopyElements, eachCopyBytes);
|
||||
|
||||
T *A_h = NULL;
|
||||
@@ -176,7 +176,7 @@ void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies,
|
||||
size_t eachCopyBytes = eachCopyElements * sizeof(T);
|
||||
|
||||
printf ("-----------------------------------------------------------------------------------------------\n");
|
||||
printf ("testing: %s Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
|
||||
printf ("testing: %s Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
|
||||
__func__, Nbytes, (double)(Nbytes)/1024.0/1024.0, numCopies, eachCopyElements, eachCopyBytes);
|
||||
|
||||
T *A_d;
|
||||
@@ -194,7 +194,7 @@ void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies,
|
||||
//stream=0; // fixme TODO
|
||||
|
||||
|
||||
for (int i=0; i<numCopies; i++)
|
||||
for (int i=0; i<numCopies; i++)
|
||||
{
|
||||
HIPASSERT(A_d + i*eachCopyElements < A_d + Nbytes);
|
||||
HIPCHECK(hipMemcpyAsync(&A_d[i*eachCopyElements], &A_h1[i*eachCopyElements], eachCopyBytes, hipMemcpyHostToDevice, stream));
|
||||
@@ -204,7 +204,7 @@ void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies,
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
for (int i=0; i<numCopies; i++)
|
||||
for (int i=0; i<numCopies; i++)
|
||||
{
|
||||
HIPASSERT(A_d + i*eachCopyElements < A_d + Nbytes);
|
||||
HIPCHECK(hipMemcpyAsync(&A_h2[i*eachCopyElements], &A_d[i*eachCopyElements], eachCopyBytes, hipMemcpyDeviceToHost, stream));
|
||||
@@ -252,7 +252,7 @@ void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemc
|
||||
|
||||
|
||||
hipStream_t *stream = (hipStream_t*)malloc(sizeof(hipStream_t) * nStreams);
|
||||
if (useNullStream) {
|
||||
if (useNullStream) {
|
||||
nStreams = 1;
|
||||
stream[0] = NULL;
|
||||
} else {
|
||||
@@ -262,7 +262,7 @@ void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemc
|
||||
}
|
||||
|
||||
|
||||
size_t workLeft = N;
|
||||
size_t workLeft = N;
|
||||
size_t workPerStream = N / nStreams;
|
||||
for (int i = 0; i < nStreams; ++i) {
|
||||
size_t work = (workLeft < workPerStream) ? workLeft : workPerStream;
|
||||
@@ -287,7 +287,7 @@ void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemc
|
||||
} else {
|
||||
HIPCHECK ( hipMemcpyAsync(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost, stream[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
hipDevice_t device;
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
int numDevices = 0;
|
||||
int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
|
||||
int *A_h, *B_h, *C_h ;
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
if(numDevices > 1)
|
||||
{
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMalloc(&X_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Y_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Z_d,Nbytes));
|
||||
|
||||
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N);
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMemcpyDtoD(X_d, A_d, Nbytes));
|
||||
HIPCHECK(hipMemcpyDtoD(Y_d, B_d, Nbytes));
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N);
|
||||
HIPCHECK(hipMemcpyDtoH(C_h, Z_d, Nbytes));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK(hipFree(X_d));
|
||||
HIPCHECK(hipFree(Y_d));
|
||||
HIPCHECK(hipFree(Z_d));
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
hipDevice_t device;
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
int numDevices = 0;
|
||||
int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
|
||||
int *A_h, *B_h, *C_h ;
|
||||
hipStream_t s;
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
if(numDevices > 1)
|
||||
{
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMalloc(&X_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Y_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Z_d,Nbytes));
|
||||
|
||||
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N);
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipStreamCreate(&s));
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMemcpyDtoDAsync(X_d, A_d, Nbytes, s));
|
||||
HIPCHECK(hipMemcpyDtoDAsync(Y_d, B_d, Nbytes, s));
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N);
|
||||
HIPCHECK(hipMemcpyDtoHAsync(C_h, Z_d, Nbytes, s));
|
||||
HIPCHECK(hipStreamSynchronize(s));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
HIPCHECK(hipStreamDestroy(s));
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK(hipFree(X_d));
|
||||
HIPCHECK(hipFree(Y_d));
|
||||
HIPCHECK(hipFree(Z_d));
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
hipDevice_t device;
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
int numDevices = 0;
|
||||
int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
|
||||
int *A_h, *B_h, *C_h ;
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
if(numDevices > 1)
|
||||
{
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMalloc(&X_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Y_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Z_d,Nbytes));
|
||||
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N);
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
hipMemcpyPeer(X_d, 1, A_d, 0, Nbytes); //this call is eqv to hipMemcpy(hipMemcpyD2D) which goes via stg bufs.
|
||||
hipMemcpyPeer(Y_d, 1, B_d, 0, Nbytes);
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N);
|
||||
HIPCHECK(hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK(hipFree(X_d));
|
||||
HIPCHECK(hipFree(Y_d));
|
||||
HIPCHECK(hipFree(Z_d));
|
||||
}
|
||||
passed();
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Conformance test for checking functionality of
|
||||
* hipError_t hipMemcpyPeer(void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes);
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
int main()
|
||||
{
|
||||
hipDevice_t device;
|
||||
size_t Nbytes = N*sizeof(int);
|
||||
int numDevices = 0;
|
||||
int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
|
||||
int *A_h, *B_h, *C_h ;
|
||||
hipStream_t s;
|
||||
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&numDevices));
|
||||
if(numDevices > 1)
|
||||
{
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMalloc(&X_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Y_d,Nbytes));
|
||||
HIPCHECK(hipMalloc(&Z_d,Nbytes));
|
||||
|
||||
|
||||
HIPCHECK(hipSetDevice(0));
|
||||
HIPCHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK ( hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d,B_d, C_d, N);
|
||||
HIPCHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipStreamCreate(&s));
|
||||
HIPCHECK(hipSetDevice(1));
|
||||
HIPCHECK(hipMemcpyPeerAsync(X_d, 1, A_d, 0, Nbytes, s));
|
||||
HIPCHECK(hipMemcpyPeerAsync(Y_d, 1, B_d, 0, Nbytes, s));
|
||||
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, X_d,Y_d, Z_d, N);
|
||||
HIPCHECK ( hipMemcpy(C_h, Z_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIPCHECK (hipDeviceSynchronize());
|
||||
HIPCHECK (hipStreamSynchronize(s));
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N);
|
||||
|
||||
HIPCHECK(hipStreamDestroy(s));
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK(hipFree(X_d));
|
||||
HIPCHECK(hipFree(Y_d));
|
||||
HIPCHECK(hipFree(Z_d));
|
||||
}
|
||||
|
||||
passed();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ THE SOFTWARE.
|
||||
|
||||
__global__ void Kernel(hipLaunchParm lp,volatile float* hostRes)
|
||||
{
|
||||
int tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
hostRes[tid] = tid + 1;
|
||||
__threadfence_system();
|
||||
// expecting that the data is getting flushed to host here!
|
||||
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11
|
||||
* RUN: %t EXCLUDE_HIP_PLATFORM all
|
||||
* RUN: %t EXCLUDE_HIP_PLATFORM all
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
@@ -33,11 +33,13 @@ THE SOFTWARE.
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <hc_am.hpp>
|
||||
#define USE_HCC_MEMTRACKER 0
|
||||
#endif
|
||||
|
||||
#define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */
|
||||
|
||||
int elementSizes[] = {16, 1024,524288};
|
||||
|
||||
|
||||
int elementSizes[] = {1, 16, 1024, 524288, 16*1000*1000};
|
||||
int nSizes = sizeof(elementSizes) / sizeof(int);
|
||||
|
||||
int enablePeers(int dev0, int dev1)
|
||||
@@ -57,26 +59,30 @@ int enablePeers(int dev0, int dev1)
|
||||
return 0;
|
||||
};
|
||||
|
||||
// Set value of array to specified 32-bit integer:
|
||||
__global__ void
|
||||
memsetIntKernel(/*hipLaunchParm lp,*/ int * ptr, const int val, size_t numElements)
|
||||
memsetIntKernel(int * ptr, const int val, size_t numElements)
|
||||
{
|
||||
int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
int stride = hipBlockDim_x * hipGridDim_x ;
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x ;
|
||||
for (size_t i= gid; i< numElements; i+=stride){
|
||||
ptr[i] = val;
|
||||
}
|
||||
};
|
||||
|
||||
__global__ void
|
||||
memcpyIntKernel(/*hipLaunchParm lp, */const int * src, int* dst, size_t numElements)
|
||||
memcpyIntKernel(const int * src, int* dst, size_t numElements)
|
||||
{
|
||||
int gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
int stride = hipBlockDim_x * hipGridDim_x ;
|
||||
int gid = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
int stride = blockDim.x * gridDim.x ;
|
||||
for (size_t i= gid; i< numElements; i+=stride){
|
||||
dst[i] = src[i];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// CHeck arrays in reverse order, to more easily detect cases where
|
||||
// the copy is "partially" done.
|
||||
void checkReverse(const int *ptr, int numElements, int expected) {
|
||||
for (int i=numElements-1; i>=0; i--) {
|
||||
if (ptr[i] != expected) {
|
||||
@@ -88,7 +94,8 @@ void checkReverse(const int *ptr, int numElements, int expected) {
|
||||
printf ("test: OK\n");
|
||||
}
|
||||
|
||||
void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_t gpu1Stream, int numElements,
|
||||
|
||||
void runTestImpl(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_t gpu1Stream, int numElements,
|
||||
int * dataGpu0_0, int * dataGpu0_1, int *dataGpu1, int *dataHost, int expected)
|
||||
{
|
||||
hipEvent_t e;
|
||||
@@ -96,7 +103,7 @@ void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_
|
||||
HIPCHECK(hipEventCreateWithFlags(&e,0));
|
||||
}
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
printf ("test: runTest with %zu bytes %s with hostSync %s\n", sizeElements, stepAIsCopy ? "copy" : "kernel", hostSync ? "enabled" : "disabled");
|
||||
printf ("test: runTestImpl with %zu bytes %s with hostSync %s\n", sizeElements, stepAIsCopy ? "copy" : "kernel", hostSync ? "enabled" : "disabled");
|
||||
|
||||
hipStream_t stepAStream = gpu0Stream;
|
||||
|
||||
@@ -127,9 +134,12 @@ void runTest(bool stepAIsCopy, bool hostSync, hipStream_t gpu0Stream, hipStream_
|
||||
HIPCHECK(hipStreamSynchronize(gpu0Stream));
|
||||
|
||||
checkReverse(dataHost, numElements, expected);
|
||||
if(!hostSync) {
|
||||
HIPCHECK(hipEventDestroy(e));
|
||||
}
|
||||
}
|
||||
|
||||
void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMemcpy)
|
||||
void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync)
|
||||
{
|
||||
const size_t sizeElements = numElements * sizeof(int);
|
||||
|
||||
@@ -163,12 +173,15 @@ void testMultiGpu(int dev0, int dev1, int numElements, bool hostSync, bool useMe
|
||||
#endif
|
||||
|
||||
printf (" test: init complete\n");
|
||||
runTest(useMemcpy , hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0,dataGpu0_1, dataGpu1, dataHost, expected);
|
||||
runTestImpl(true, hostSync, gpu0Stream, gpu1Stream, numElements, dataGpu0_0,dataGpu0_1, dataGpu1, dataHost, expected);
|
||||
|
||||
HIPCHECK(hipFree(dataGpu0_0));
|
||||
HIPCHECK(hipFree(dataGpu0_1));
|
||||
HIPCHECK(hipFree(dataGpu1));
|
||||
HIPCHECK(hipHostFree(dataHost));
|
||||
|
||||
HIPCHECK(hipStreamDestroy(gpu0Stream));
|
||||
HIPCHECK(hipStreamDestroy(gpu1Stream));
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
@@ -192,11 +205,9 @@ int main(int argc, char *argv[])
|
||||
return -1;
|
||||
};
|
||||
|
||||
for(int index = 1;index < nSizes;index++) {
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , false /* GPU Synchronization*/, true);
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, true);
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/, false);
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , false /*Host Synchronization*/, false);
|
||||
for(int index = 0;index < nSizes;index++) {
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , false /*GPU Synchronization*/);
|
||||
testMultiGpu(dev0, dev1, elementSizes[index] , true /*Host Synchronization*/);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ THE SOFTWARE.
|
||||
#define kernel_name "hello_world"
|
||||
|
||||
__global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
Bd[tx] = Ad[tx];
|
||||
}
|
||||
|
||||
|
||||
@@ -24,7 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
extern "C" __global__ void hello_world(hipLaunchParm lp, float *a, float *b)
|
||||
{
|
||||
int tx = hipThreadIdx_x;
|
||||
int tx = threadIdx.x;
|
||||
b[tx] = a[tx];
|
||||
}
|
||||
|
||||
|
||||
@@ -41,8 +41,8 @@ void printSep()
|
||||
// Designed to stress a small number of simple smoke tests
|
||||
|
||||
template<
|
||||
typename T=float,
|
||||
class P=HipTest::Unpinned,
|
||||
typename T=float,
|
||||
class P=HipTest::Unpinned,
|
||||
class C=HipTest::Memcpy
|
||||
>
|
||||
void simpleVectorAdd(size_t numElements, int iters, hipStream_t stream)
|
||||
|
||||
@@ -35,7 +35,7 @@ THE SOFTWARE.
|
||||
|
||||
template<typename T>
|
||||
__global__ void Inc(hipLaunchParm lp, T *Array){
|
||||
int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x;
|
||||
int tx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
Array[tx] = Array[tx] + T(1);
|
||||
}
|
||||
|
||||
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
const size_t size = N * sizeof(float);
|
||||
|
||||
|
||||
for (int i=0; i< iterations; i++) {
|
||||
|
||||
std::thread t1(run1, size, stream[0]);
|
||||
@@ -126,7 +126,7 @@ int main(int argc, char **argv)
|
||||
// std::cout<<"T1"<<std::endl;
|
||||
t2.join();
|
||||
// std::cout<<"T2"<<std::endl;
|
||||
t3.join();
|
||||
t3.join();
|
||||
}
|
||||
passed();
|
||||
}
|
||||
|
||||
変更されたファイルが多すぎるため、一部のファイルは表示されません さらに表示
新しいイシューから参照
ユーザーをブロックする