Merge branch 'amd-develop' into amd-master

Change-Id: Ieda710b081404e4805c568081ae8fe623f71d6ad
Tento commit je obsažen v:
Maneesh Gupta
2016-09-09 16:46:27 +05:30
26 změnil soubory, kde provedl 642 přidání a 327 odebrání
+1 -1
Zobrazit soubor
@@ -116,7 +116,7 @@ endif()
# Set if we need to build shared or static library
if(NOT DEFINED HIP_LIB_TYPE)
if(NOT DEFINED ENV{HIP_LIB_TYPE})
set(HIP_LIB_TYPE 0)
set(HIP_LIB_TYPE 1)
else()
set(HIP_LIB_TYPE $ENV{HIP_LIB_TYPE})
endif()
+1
Zobrazit soubor
@@ -31,6 +31,7 @@ HIP releases are typically of two types. The tag naming convention is different
- [HIP Kernel Language](docs/markdown/hip_kernel_language.md)
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
- [HIP Porting Guide](docs/markdown/hip_porting_guide.md)
- [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md)
- [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
- [clang-hipify](clang-hipify/README.md)
- [Developer/CONTRIBUTING Info](CONTRIBUTING.md)
Spustitelný soubor
+62
Zobrazit soubor
@@ -0,0 +1,62 @@
#!/bin/bash
function die {
echo "${1-Died}." >&2
exit 1
}
if [ $# = 0 ]; then
die "$(basename $0): Invalid number of arguments"
fi
: ${ROCM_PATH:=/opt/rocm}
: ${ROCM_TARGET:=fiji}
INPUT_FILES=""
OUTPUT_FILE=""
while [[ $# -gt 1 ]]; do
key="$1"
case $key in
-o)
OUTPUT_FILE="$2"
shift
;;
*)
INPUT_FILES="$INPUT_FILES $key"
esac
shift
done
[ INPUT_FILES != "" ] || die "No source files specified"
[ OUTPUT_FILE != "" ] || die "Output file not specified"
SOURCE="${BASH_SOURCE[0]}"
HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )"
export KMDUMPISA=1
export KMDUMPLLVM=1
hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX`
hipgenisa_main=`mktemp src.XXXXXXXX.cpp`
hipgenisa_files="$hipgenisa_main"
for inputfile in $INPUT_FILES; do
sed 's/extern \+"C" \+//g' $inputfile > $inputfile.kernel.tmp.cpp
hipgenisa_files="$hipgenisa_files $inputfile.kernel.tmp.cpp"
done
printf "\nint main(){}\n" >> $hipgenisa_main
$HIP_PATH/bin/hipcc $hipgenisa_files -o $hipgenisa_dir/a.out
mv dump.* $hipgenisa_dir
$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$ROCM_TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o
$ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co
map_sym=""
kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6)
for mangled_sym in $kernels; do
real_sym=$(c++filt $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g') | cut -d\( -f1)
map_sym="--redefine-sym $mangled_sym=$real_sym $map_sym"
done
objcopy -F elf64-little $map_sym $hipgenisa_dir/dump.co $OUTPUT_FILE
rm $hipgenisa_files
rm -r $hipgenisa_dir
+1 -2
Zobrazit soubor
@@ -181,8 +181,7 @@ if ($verbose & 0x4) {
# Handle code object generation
my $ISACMD="";
if($HIP_PLATFORM eq "hcc"){
$ISACMD .= "$HIP_PATH/bin/hipgenisa.sh ";
$ISACMD .= $ROCM_PATH;
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
if($ARGV[0] eq "--genco"){
foreach $isaarg (@ARGV[1..$#ARGV]){
$ISACMD .= " ";
-42
Zobrazit soubor
@@ -1,42 +0,0 @@
#!/bin/bash
if [ $1 = " " ]
then
exit
fi
ROCM_PATH=$1
GEN_ISA=$2
FILE_NAMES=$3
OUT=$4
OUTPUT_FILE=$5
TARGET=""
if [ ${GEN_ISA:0:12} = "--target-isa" ]
then
TARGET=${GEN_ISA:13:12}
fi
SOURCE="${BASH_SOURCE[0]}"
HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )"
export KMDUMPISA=1
export KMDUMPLLVM=1
hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX`;
sed 's/extern \+"C" \+//g' $FILE_NAMES > $FILE_NAMES.kernel.tmp.cpp
echo "
int main(){}
" >> $FILE_NAMES.kernel.tmp.cpp
$HIP_PATH/bin/hipcc $FILE_NAMES.kernel.tmp.cpp -o $hipgenisa_dir/a.out
mv dump.* $hipgenisa_dir
$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o
$ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co
map_sym=""
kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6)
for mangled_sym in $kernels
do
real_sym=$(c++filt $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g') | cut -d\( -f1)
map_sym="--redefine-sym $mangled_sym=$real_sym $map_sym"
done
objcopy -F elf64-little $map_sym $hipgenisa_dir/dump.co $OUTPUT_FILE
rm $FILE_NAMES.kernel.tmp.cpp
rm -r $hipgenisa_dir
+5 -5
Zobrazit soubor
@@ -699,12 +699,12 @@ sub warnUnsupportedSpecialFunctions
#"__ballot",
#"__popc",
#"__clz",
#"__clz",
"__shfl",
"__shfl_up",
"__shfl_down",
"__shfl_xor",
#"__shfl",
#"__shfl_up",
#"__shfl_down",
#"__shfl_xor",
"__prof_trigger",
+100 -23
Zobrazit soubor
@@ -56,7 +56,8 @@ using namespace llvm;
#define DEBUG_TYPE "cuda2hip"
enum ConvTypes {
CONV_DEV = 0,
CONV_DRIVER = 0,
CONV_DEV,
CONV_MEM,
CONV_KERN,
CONV_COORD_FUNC,
@@ -64,6 +65,9 @@ enum ConvTypes {
CONV_SPECIAL_FUNC,
CONV_STREAM,
CONV_EVENT,
CONV_CONTEXT,
CONV_MODULE,
CONV_CACHE,
CONV_ERR,
CONV_DEF,
CONV_TEX,
@@ -77,11 +81,10 @@ enum ConvTypes {
};
const char *counterNames[CONV_LAST] = {
"dev", "mem", "kern", "coord_func", "math_func",
"special_func", "stream", "event", "err", "def",
"tex", "other", "include", "include_cuda_main_header",
"driver", "dev", "mem", "kern", "coord_func", "math_func",
"special_func", "stream", "event", "ctx", "module", "cache",
"err", "def", "tex", "other", "include", "include_cuda_main_header",
"type", "literal", "numeric_literal"};
enum ApiTypes {
API_DRIVER = 0,
API_RUNTIME,
@@ -90,7 +93,7 @@ enum ApiTypes {
};
const char *apiNames[API_LAST] = {
"CUDA API", "CUDA RT API", "CUDA BLAS API"};
"CUDA", "CUDA RT", "CUBLAS"};
namespace {
@@ -294,10 +297,10 @@ struct cuda2hipMap {
cuda2hipRename["CUfunc_cache_enum"] = {"hipFuncCache", CONV_TYPE, API_DRIVER};
cuda2hipRename["CUfunc_cache"] = {"hipFuncCache", CONV_TYPE, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_DEV, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_DEV, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_DEV, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_DEV, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_CACHE, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_CACHE, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_DRIVER};
cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_DRIVER};
cuda2hipRename["CUsharedconfig_enum"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER};
cuda2hipRename["CUsharedconfig"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER};
@@ -308,7 +311,81 @@ struct cuda2hipMap {
cuda2hipRename["CUcontext"] = {"hipCtx_t", CONV_TYPE, API_DRIVER};
cuda2hipRename["CUmodule"] = {"hipModule_t", CONV_TYPE, API_DRIVER};
cuda2hipRename["CUevent"] = {"hipEvent_t", CONV_TYPE, API_DRIVER};
// Event Flags
cuda2hipRename["CU_EVENT_DEFAULT"] = {"hipEventDefault", CONV_EVENT, API_DRIVER};
cuda2hipRename["CU_EVENT_BLOCKING_SYNC"] = {"hipEventBlockingSync", CONV_EVENT, API_DRIVER};
cuda2hipRename["CU_EVENT_DISABLE_TIMING"] = {"hipEventDisableTiming", CONV_EVENT, API_DRIVER};
cuda2hipRename["CU_EVENT_INTERPROCESS"] = {"hipEventInterprocess", CONV_EVENT, API_DRIVER};
cuda2hipRename["CUstream"] = {"hipStream_t", CONV_TYPE, API_DRIVER};
// Stream Flags
cuda2hipRename["CU_STREAM_DEFAULT"] = {"hipStreamDefault", CONV_STREAM, API_DRIVER};
cuda2hipRename["CU_STREAM_NON_BLOCKING"] = {"hipStreamNonBlocking", CONV_STREAM, API_DRIVER};
// Context
cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxDestroy_v2"] = {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxPopCurrent_v2"] = {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxPushCurrent_v2"] = {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxSetCurrent"] = {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetCurrent"] = {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetDevice"] = {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetApiVersion"] = {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetCacheConfig"] = {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxSetCacheConfig"] = {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxSetSharedMemConfig"] = {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetSharedMemConfig"] = {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxSynchronize"] = {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxGetFlags"] = {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_CONTEXT, API_DRIVER};
cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_CONTEXT, API_DRIVER};
// unsupported yet by HIP
// cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER};
// cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_CONTEXT, API_DRIVER};
// Device
cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEV, API_DRIVER};
// unsupported yet by HIP
// cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEV, API_DRIVER};
cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_DRIVER};
// Driver
cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_DRIVER};
// Events
cuda2hipRename["cuEventCreate"] = {"hipEventCreate", CONV_EVENT, API_DRIVER};
cuda2hipRename["cuEventDestroy_v2"] = {"hipEventDestroy", CONV_EVENT, API_DRIVER};
cuda2hipRename["cuEventElapsedTime"] = {"hipEventElapsedTime", CONV_EVENT, API_DRIVER};
cuda2hipRename["cuEventQuery"] = {"hipEventQuery", CONV_EVENT, API_DRIVER};
cuda2hipRename["cuEventRecord"] = {"hipEventRecord", CONV_EVENT, API_DRIVER};
cuda2hipRename["cuEventSynchronize"] = {"hipEventSynchronize", CONV_EVENT, API_DRIVER};
// Module
cuda2hipRename["cuModuleGetFunction"] = {"hipModuleGetFunction", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleGetGlobal_v2"] = {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER};
// unsupported yet by HIP
// cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER};
// cuda2hipRename["cuModuleLoadFatBinary"] = {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuLaunchKernel"] = {"hipModuleLaunchKernel", CONV_MODULE, API_DRIVER};
// Streams
// unsupported yet by HIP
// cuda2hipRename["cuStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_DRIVER};
cuda2hipRename["cuStreamCreate"] = {"hipStreamCreate", CONV_STREAM, API_DRIVER};
cuda2hipRename["cuStreamDestroy_v2"] = {"hipStreamDestroy", CONV_STREAM, API_DRIVER};
cuda2hipRename["cuStreamQuery"] = {"hipStreamQuery", CONV_STREAM, API_DRIVER};
cuda2hipRename["cuStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_DRIVER};
cuda2hipRename["cuStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER};
/////////////////////////////// CUDA RT API ///////////////////////////////
// Error API
@@ -411,8 +488,8 @@ struct cuda2hipMap {
cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM, API_RUNTIME};
cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME};
cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME};
// Stream Flags
cuda2hipRename["cudaStreamGetFlags"] = {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME};
// Stream Flags
cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM, API_RUNTIME};
cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_STREAM, API_RUNTIME};
@@ -425,6 +502,7 @@ struct cuda2hipMap {
cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME};
// Attributes
cuda2hipRename["cudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME};
@@ -482,22 +560,21 @@ struct cuda2hipMap {
//cuda2hipRename["cudaDeviceMask"] = {"hipDeviceMask", CONV_DEV, API_RUNTIME};
// Cache config
cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME};
// translate deprecated
cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME};
// translate deprecated
cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_RUNTIME};
cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_CACHE, API_RUNTIME};
// Driver/Runtime
cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME};
cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_RUNTIME};
// unsupported yet
//cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_DEV, API_RUNTIME};
+5 -7
Zobrazit soubor
@@ -179,7 +179,7 @@ hip_find_helper_file(run_hipcc cmake)
###############################################################################
###############################################################################
# MACRO: Seperate the options from the sources
# MACRO: Separate the options from the sources
###############################################################################
macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options)
set(${_sources})
@@ -303,7 +303,7 @@ macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file)
endforeach()
else()
# No dependencies, so regenerate the file
set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
endif()
# Regenerate the dependency file if needed
@@ -416,7 +416,7 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files)
# Create up the comment string
file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
set(hip_build_comment_string "Building HIPCC (${cuda_build_type}) object ${generated_file_relative_path}")
set(hip_build_comment_string "Building HIPCC (using ${HIP_PLATFORM}) object ${generated_file_relative_path}")
# Build the generated file and dependency file
add_custom_command(
@@ -453,11 +453,9 @@ macro(HIP_ADD_EXECUTABLE hip_target)
# Separate the sources from the options
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
set(HIP_CMAKE_CXX_LINK_EXECUTABLE ${CMAKE_CXX_LINK_EXECUTABLE})
set(CMAKE_CXX_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET>")
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET>")
add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX})
#set(CMAKE_CXX_COMPILER ${ORIGINAL_CMAKE_CXX_COMPILER})
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP)
endmacro()
# vim: ts=4:sw=4:expandtab:smartindent
+1 -1
Zobrazit soubor
@@ -22,7 +22,7 @@ endif()
# Set these up as variables to make reading the generated file easier
set(HIP_HIPCC_EXECUTABLE "@HIP_HIPCC_EXECUTABLE@") # path
set(HIP_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path
set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path
set(HIP_PLATFORM "@HIP_PLATFORM@") #string
set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path
set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path
+2
Zobrazit soubor
@@ -15,3 +15,5 @@ The default device can be set with hipSetDevice.
- hipify - tool to convert CUDA(R) code to portable C++ code.
- hipconfig - tool to report various confoguration properties of the target platform.
- nvcc = nvcc compiler, do not capitalize.
- hcc = heterogeneous compute compiler, do not capitalize.
+43 -7
Zobrazit soubor
@@ -25,7 +25,7 @@ THE SOFTWARE.
#include "hip/hcc_detail/unpinned_copy_engine.h"
#if defined(__HCC__) && (__hcc_workweek__ < 16186)
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
#error("This version of HIP requires a newer version of HCC.");
#endif
@@ -87,7 +87,8 @@ class ihipCtx_t;
#define KCYN "\x1B[36m"
#define KWHT "\x1B[37m"
#define API_COLOR KGRN
extern const char *API_COLOR;
extern const char *API_COLOR_END;
// If set, thread-safety is enforced on all stream functions.
@@ -149,7 +150,7 @@ class ihipCtx_t;
if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
if (COMPILE_HIP_DB && HIP_TRACE_API) {\
fprintf (stderr, API_COLOR "<<hip-api: %s\n" KNRM, s.c_str());\
fprintf (stderr, "%s<<hip-api: %s\n%s" , API_COLOR, s.c_str(), API_COLOR_END);\
}\
SCOPED_MARKER(s.c_str(), "HIP", NULL);\
}\
@@ -179,7 +180,7 @@ class ihipCtx_t;
tls_lastHipError = localHipStatus;\
\
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>\n" KNRM, (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus));\
fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
}\
localHipStatus;\
})
@@ -365,8 +366,23 @@ public:
class ihipFunction_t{
public:
hsa_executable_symbol_t kernel_symbol;
uint64_t kernel;
ihipFunction_t(const char *name) {
size_t nameSz = strlen(name);
char *kernelName = (char*)malloc(nameSz);
strncpy(kernelName, name, nameSz);
_kernelName = kernelName;
};
~ihipFunction_t() {
if (_kernelName) {
free((void*)_kernelName);
_kernelName = NULL;
};
};
public:
const char *_kernelName;
hsa_executable_symbol_t _kernelSymbol;
uint64_t _kernel;
};
@@ -415,6 +431,9 @@ public:
SIGSEQNUM _streamSigId; // Monotonically increasing unique signal id.
hc::accelerator_view _av;
std::vector<hc::completion_future*> _cfs;
};
@@ -452,6 +471,9 @@ typedef uint64_t SeqNum_t ;
void locked_waitEvent(hipEvent_t event);
void locked_recordEvent(hipEvent_t event);
void addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future* cf);
void waitOnAllCFs(LockedAccessor_StreamCrit_t &crit);
//---
// Use this if we already have the stream critical data mutex:
@@ -460,7 +482,8 @@ typedef uint64_t SeqNum_t ;
void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel);
uint32_t groupSegmentSize, uint32_t sharedMemBytes,
void *kernarg, size_t kernSize, uint64_t kernel);
// Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function.
SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; };
@@ -498,6 +521,7 @@ private: // Data
// Friends:
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
friend hipError_t hipStreamQuery(hipStream_t);
};
@@ -718,6 +742,18 @@ inline std::ostream & operator<<(std::ostream& os, const dim3& s)
return os;
}
inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
{
os << '{';
os << s.x;
os << ',';
os << s.y;
os << ',';
os << s.z;
os << '}';
return os;
}
// Stream printf functions:
inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
{
+6 -12
Zobrazit soubor
@@ -621,25 +621,19 @@ __device__ static inline void* memset(void* ptr, uint8_t val, size_t size)
#define HIP_KERNEL_NAME(...) __VA_ARGS__
#ifdef __HCC_CPP__
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp);
extern void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
extern void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp);
// TODO - move to common header file.
#define KNRM "\x1B[0m"
#define KGRN "\x1B[32m"
// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
do {\
grid_launch_parm lp;\
lp.dynamic_group_mem_bytes = _groupMemBytes; \
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \
if (HIP_TRACE_API) {\
ihipPrintKernelLaunch(#_kernelName, &lp, _stream); \
}\
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
_kernelName (lp, ##__VA_ARGS__);\
ihipPostLaunchKernel(trueStream, lp);\
} while(0)
+38 -12
Zobrazit soubor
@@ -278,6 +278,11 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceI
* @param [out] prop written with device properties
* @param [in] deviceId which device to query for information
*
* @return #hipSuccess, #hipErrorInvalidDevice
* @bug HCC always returns 0 for maxThreadsPerMultiProcessor
* @bug HCC always returns 0 for regsPerBlock
* @bug HCC always returns 0 for l2CacheSize
*
* Populates hipGetDeviceProperties with information for the specified device.
*/
hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
@@ -289,6 +294,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
/**
* @brief Set L1/Shared cache partition.
*
* @returns #hipSuccess
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
@@ -298,6 +304,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
/**
* @brief Set Cache configuration for a specific function
*
* @returns #hipSuccess
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
@@ -307,6 +314,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
/**
* @brief Set Cache configuration for a specific function
*
* @returns #hipSuccess
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
*
*/
@@ -318,6 +326,7 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
/**
* @brief Get Shared memory bank configuration.
*
* @returns #hipSuccess
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
*/
@@ -327,6 +336,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
/**
* @brief Set Shared memory bank configuration.
*
* @returns #hipSuccess
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
*
*/
@@ -335,6 +345,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
/**
* @brief Set Device flags
*
* @returns #hipSuccess
* Note: Only hipDeviceScheduleAuto and hipDeviceMapHost are supported
*
*/
@@ -345,6 +356,14 @@ hipError_t hipSetDeviceFlags ( unsigned flags);
* @}
*/
/**
* @brief Select compute-device which best matches criteria.
*
* @param [out] device ID
* @param [in] device properties pointer
*
*/
hipError_t hipChooseDevice(int *device,hipDeviceProp_t* prop);
/**
*-------------------------------------------------------------------------------------------------
@@ -474,6 +493,20 @@ hipError_t hipStreamCreate(hipStream_t *stream);
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
/**
* @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or #hipErrorNotReady if not.
*
* @param[in] stream stream to query
*
* @return #hipSuccess, #hipErrorNotReady
*
* This is thread-safe and returns a snapshot of the current state of the queue. However, if other host threads are sending work to the stream,
* the status may change immediately after the function is called. It is typically used for debug.
*/
hipError_t hipStreamQuery(hipStream_t stream);
/**
* @brief Wait for all commands in stream to complete.
*
@@ -726,7 +759,7 @@ hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags) ;
/**
* @brief Return flags associated with host pointer
* @brief Return flags associated with host pointer
*
* @param[out] flagsPtr Memory location to store flags
* @param[in] hostPtr Host Pointer allocated through hipHostMalloc
@@ -1227,13 +1260,6 @@ hipError_t hipCtxDisablePeerAccess (hipCtx_t peerCtx);
* @}
*/
// TODO-ctx
/**
* @return hipSuccess, hipErrorInvalidDevice
*/
hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId);
/**
* @brief Returns a handle to a compute device
* @param [out] device
@@ -1299,10 +1325,10 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
*
* @param [in] fname
* @param [out] module
*
*
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized
*
*
*
*/
hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
@@ -1313,7 +1339,7 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
*
* @returns hipSuccess, hipInvalidValue
* module is freed and the code objects associated with it are destroyed
*
*
*/
hipError_t hipModuleUnload(hipModule_t module);
@@ -1325,7 +1351,7 @@ hipError_t hipModuleUnload(hipModule_t module);
* @param [in] kname
* @param [out] function
*
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound,
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound,
*/
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname);
+1 -5
Zobrazit soubor
@@ -5,17 +5,13 @@ endif
HIPCC=$(HIP_PATH)/bin/hipcc
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
ifeq (${HIP_PLATFORM}, hcc)
GENCO_FLAGS=--target-isa=fiji
endif
all: vcpy_kernel.code runKernel.hip.out
runKernel.hip.out: runKernel.cpp
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
vcpy_kernel.code: vcpy_kernel.cpp
$(HIPCC) --genco $(GENCO_FLAGS) $< -o $@
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
clean:
rm -f *.code *.out
+39 -82
Zobrazit soubor
@@ -24,10 +24,6 @@ THE SOFTWARE.
//-------------------------------------------------------------------------------------------------
//Devices
//-------------------------------------------------------------------------------------------------
//---
/**
* @return #hipSuccess, hipErrorInvalidDevice
*/
// TODO - does this initialize HIP runtime?
hipError_t hipGetDevice(int *deviceId)
{
@@ -47,11 +43,6 @@ hipError_t hipGetDevice(int *deviceId)
return ihipLogStatus(e);
}
//---
/**
* @return #hipSuccess, #hipErrorNoDevice
*/
// TODO - does this initialize HIP runtime?
hipError_t hipGetDeviceCount(int *count)
{
@@ -66,11 +57,6 @@ hipError_t hipGetDeviceCount(int *count)
}
}
//---
/**
* @returns #hipSuccess
*/
hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
{
HIP_INIT_API(cacheConfig);
@@ -80,11 +66,6 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
return ihipLogStatus(hipSuccess);
}
//---
/**
* @returns #hipSuccess
*/
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
{
HIP_INIT_API(cacheConfig);
@@ -94,11 +75,6 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
return ihipLogStatus(hipSuccess);
}
//---
/**
* @returns #hipSuccess
*/
hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
{
HIP_INIT_API(cacheConfig);
@@ -108,12 +84,6 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
return ihipLogStatus(hipSuccess);
}
//---
/**
* @returns #hipSuccess
*/
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
{
HIP_INIT_API(config);
@@ -123,12 +93,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
return ihipLogStatus(hipSuccess);
}
//---
/**
* @returns #hipSuccess
*/
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
{
HIP_INIT_API(pConfig);
@@ -138,10 +102,6 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
return ihipLogStatus(hipSuccess);
}
//---
/**
* @return #hipSuccess, #hipErrorInvalidDevice
*/
hipError_t hipSetDevice(int deviceId)
{
HIP_INIT_API(deviceId);
@@ -153,22 +113,12 @@ hipError_t hipSetDevice(int deviceId)
}
}
//---
/**
* @return #hipSuccess
*/
hipError_t hipDeviceSynchronize(void)
{
HIP_INIT_API();
return ihipSynchronize();
return ihipLogStatus(ihipSynchronize());
}
//---
/**
* @return @ref hipSuccess
*/
hipError_t hipDeviceReset(void)
{
HIP_INIT_API();
@@ -182,15 +132,12 @@ hipError_t hipDeviceReset(void)
if (ctx) {
// Release ctx resources (streams and memory):
ctx->locked_reset();
ctx->locked_reset();
}
return ihipLogStatus(hipSuccess);
}
/**
*
*/
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
{
HIP_INIT_API(pi, attr, device);
@@ -260,13 +207,6 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
return ihipLogStatus(e);
}
/**
* @return #hipSuccess, #hipErrorInvalidDevice
* @bug HCC always returns 0 for maxThreadsPerMultiProcessor
* @bug HCC always returns 0 for regsPerBlock
* @bug HCC always returns 0 for l2CacheSize
*/
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
{
HIP_INIT_API(props, device);
@@ -285,7 +225,6 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
return ihipLogStatus(e);
}
hipError_t hipSetDeviceFlags( unsigned int flags)
{
HIP_INIT_API(flags);
@@ -306,25 +245,6 @@ hipError_t hipSetDeviceFlags( unsigned int flags)
return ihipLogStatus(e);
};
hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId)
{
HIP_INIT_API(device, deviceId);
hipError_t e = hipSuccess;
*device = ihipGetDevice(deviceId);
if (device == nullptr) {
e = hipErrorInvalidDevice;
}
return ihipLogStatus(e);
}
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
{
HIP_INIT_API(major,minor, device);
@@ -361,3 +281,40 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device)
*bytes= device->_props.totalGlobalMem;
return ihipLogStatus(e);
}
hipError_t hipChooseDevice( int* device, const hipDeviceProp_t* prop )
{
hipDeviceProp_t tempProp;
int deviceCount;
int inPropCount=0;
int matchedPropCount=0;
hipError_t e = hipSuccess;
hipGetDeviceCount( &deviceCount );
*device = 0;
for (int i=0; i< deviceCount; i++) {
hipGetDeviceProperties( &tempProp, i );
if(prop->major !=0) {
inPropCount++;
if(tempProp.major >= prop->major) {
matchedPropCount++;
}
if(prop->minor !=0) {
inPropCount++;
if(tempProp.minor >= prop->minor) {
matchedPropCount++;
}
}
}
if(inPropCount == matchedPropCount) {
*device = i;
}
#if 0
else{
e= hipErrorInvalidValue;
}
#endif
}
return ihipLogStatus(e);
}
+111 -14
Zobrazit soubor
@@ -57,10 +57,14 @@ const int release = 1;
#define MEMCPY_H2D_DIRECT_VS_STAGING_COPY_THRESHOLD 65336
#define MEMCPY_H2D_STAGING_VS_PININPLACE_COPY_THRESHOLD 1048576
const char *API_COLOR = KGRN;
const char *API_COLOR_END = KNRM;
int HIP_LAUNCH_BLOCKING = 0;
int HIP_PRINT_ENV = 0;
int HIP_TRACE_API= 0;
std::string HIP_TRACE_API_COLOR("green");
int HIP_ATP_MARKER= 0;
int HIP_DB= 0;
int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */
@@ -246,7 +250,8 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
{
if (! assertQueueEmpty) {
tprintf (DB_SYNC, "stream %p wait for queue-empty..\n", this);
crit->_av.wait();
// crit->_av.wait();
waitOnAllCFs(crit);
}
if (crit->_last_copy_signal) {
@@ -262,6 +267,21 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
// crit->_signalCnt = 0;
}
void ihipStream_t::addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future *cf)
{
crit->_cfs.push_back(cf);
}
void ihipStream_t::waitOnAllCFs(LockedAccessor_StreamCrit_t &crit)
{
for(uint32_t i=0;i<crit->_cfs.size();i++){
if(crit->_cfs[i] != NULL){
crit->_cfs[i]->wait();
delete crit->_cfs[i];
}
}
crit->_cfs.clear();
}
//---
//Wait for all kernel and data copy commands in this stream to complete.
@@ -522,7 +542,8 @@ void ihipStream_t::launchModuleKernel(
uint32_t gridDimX,
uint32_t gridDimY,
uint32_t gridDimZ,
uint32_t sharedMemBytes,
uint32_t groupSegmentSize,
uint32_t privateSegmentSize,
void *kernarg,
size_t kernSize,
uint64_t kernel){
@@ -545,8 +566,8 @@ void ihipStream_t::launchModuleKernel(
dispatch_packet->grid_size_x = blockDimX * gridDimX;
dispatch_packet->grid_size_y = blockDimY * gridDimY;
dispatch_packet->grid_size_z = blockDimZ * gridDimZ;
dispatch_packet->group_segment_size = 0;
dispatch_packet->private_segment_size = sharedMemBytes;
dispatch_packet->group_segment_size = groupSegmentSize;
dispatch_packet->private_segment_size = privateSegmentSize;
dispatch_packet->kernarg_address = kern;
dispatch_packet->kernel_object = kernel;
uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
@@ -1122,6 +1143,7 @@ void ihipCtx_t::locked_waitAllStreams()
//---
// Read environment variables.
void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, const char *description)
{
@@ -1132,6 +1154,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
env = getenv(var_name2);
}
// TODO: Refactor this code so it is a separate call rather than being part of ihipReadEnv_I, which should only read integers.
// Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which
// contains a sequence of comma-separated device IDs
if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){
@@ -1169,15 +1192,37 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
printf ("%-30s = %2d : %s\n", var_name1, *var_ptr, description);
}
}
}
void ihipReadEnv_S(std::string *var_ptr, const char *var_name1, const char *var_name2, const char *description)
{
char * env = getenv(var_name1);
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
if ((env == NULL) && strcmp(var_name2, "0")) {
env = getenv(var_name2);
}
if (env) {
*var_ptr = env;
}
if (HIP_PRINT_ENV) {
printf ("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description);
}
}
#if defined (DEBUG)
#define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \
if ((_build == release) || (_build == debug) {\
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
};
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
if ((_build == release) || (_build == debug) {\
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
};
#else
@@ -1186,6 +1231,11 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
};
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
if (_build == release) {\
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
};
#endif
@@ -1218,6 +1268,7 @@ void ihipInit()
}
READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes.");
READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White");
READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL");
READ_ENV_I(release, HIP_STAGING_SIZE, 0, "Size of each staging buffer (in KB)" );
READ_ENV_I(release, HIP_STAGING_BUFFERS, 0, "Number of staging buffers to use in each direction. 0=use hsa_memory_copy.");
@@ -1261,6 +1312,31 @@ void ihipInit()
fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER);
}
std::transform(HIP_TRACE_API_COLOR.begin(), HIP_TRACE_API_COLOR.end(), HIP_TRACE_API_COLOR.begin(), ::tolower);
if (HIP_TRACE_API_COLOR == "none") {
API_COLOR = "";
API_COLOR_END = "";
} else if (HIP_TRACE_API_COLOR == "red") {
API_COLOR = KRED;
} else if (HIP_TRACE_API_COLOR == "green") {
API_COLOR = KGRN;
} else if (HIP_TRACE_API_COLOR == "yellow") {
API_COLOR = KYEL;
} else if (HIP_TRACE_API_COLOR == "blue") {
API_COLOR = KBLU;
} else if (HIP_TRACE_API_COLOR == "magenta") {
API_COLOR = KMAG;
} else if (HIP_TRACE_API_COLOR == "cyan") {
API_COLOR = KCYN;
} else if (HIP_TRACE_API_COLOR == "white") {
API_COLOR = KWHT;
} else {
fprintf (stderr, "warning: env var HIP_TRACE_API_COLOR=%s must be None/Red/Green/Yellow/Blue/Magenta/Cyan/White", HIP_TRACE_API_COLOR.c_str());
};
/*
* Build a table of valid compute devices.
@@ -1332,7 +1408,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
#endif
return device->_defaultStream;
} else {
// Have to wait for legacy default stream to be empty:
// ALl streams have to wait for legacy default stream to be empty:
if (!(stream->_flags & hipStreamNonBlocking)) {
tprintf(DB_SYNC, "stream %p wait default stream\n", stream);
stream->getCtx()->_defaultStream->locked_wait();
@@ -1344,16 +1420,26 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream)
{
std::string streamString = ToString(stream);
fprintf(stderr, KGRN "<<hip-api: hipLaunchKernel '%s' gridDim:(%d,%d,%d) groupDim:(%d,%d,%d) groupMem:+%d %s\n" KNRM, \
kernelName, lp->grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z,
lp->dynamic_group_mem_bytes, streamString.c_str());\
if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {
std::stringstream os;
os << "<<hip-api: hipLaunchKernel '" << kernelName << "'"
<< " gridDim:" << lp->grid_dim
<< " groupDim:" << lp->group_dim
<< " sharedMem:+" << lp->dynamic_group_mem_bytes
<< " " << *stream;
if (COMPILE_HIP_DB && HIP_TRACE_API) {
std::cerr << API_COLOR << os.str() << API_COLOR_END << std::endl;
}
SCOPED_MARKER(os.str().c_str(), "HIP", NULL);
}
}
// TODO - data-up to data-down:
// Called just before a kernel is launched from hipLaunchKernel.
// Allows runtime to track some information about the stream.
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp)
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr)
{
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
@@ -1369,11 +1455,14 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_
auto crit = stream->lockopen_preKernelCommand();
lp->av = &(crit->_av);
lp->cf = new hc::completion_future;
stream->addCFtoStream(crit, lp->cf);
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
return (stream);
}
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp)
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr)
{
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
@@ -1389,11 +1478,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
auto crit = stream->lockopen_preKernelCommand();
lp->av = &(crit->_av);
lp->cf = new hc::completion_future;
stream->addCFtoStream(crit, lp->cf);
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
return (stream);
}
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp)
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr)
{
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
@@ -1409,11 +1500,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
auto crit = stream->lockopen_preKernelCommand();
lp->av = &(crit->_av);
lp->cf = new hc::completion_future;
stream->addCFtoStream(crit, lp->cf);
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
return (stream);
}
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp)
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr)
{
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
@@ -1429,6 +1522,10 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g
auto crit = stream->lockopen_preKernelCommand();
lp->av = &(crit->_av);
lp->cf = new hc::completion_future; // TODO, is this necessary?
stream->addCFtoStream(crit, lp->cf);
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
return (stream);
}
+52 -31
Zobrazit soubor
@@ -30,7 +30,7 @@ THE SOFTWARE.
//TODO Use Pool APIs from HCC to get memory regions.
namespace hipdrv{
namespace hipdrv {
hsa_status_t findSystemRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
@@ -99,7 +99,7 @@ uint64_t ElfSize(const void *emi){
}
hipError_t hipModuleLoad(hipModule_t *module, const char *fname){
HIP_INIT_API(fname);
HIP_INIT_API(module, fname);
hipError_t ret = hipSuccess;
*module = new ihipModule_t;
@@ -187,7 +187,7 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
ret = hipErrorInvalidContext;
}else{
*func = new ihipFunction_t;
*func = new ihipFunction_t(name);
int deviceId = ctx->getDevice()->_deviceId;
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
@@ -199,14 +199,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
}
status = hsa_executable_freeze(hmod->executable, NULL);
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->kernel_symbol);
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->_kernelSymbol);
if(status != HSA_STATUS_SUCCESS){
return ihipLogStatus(hipErrorNotFound);
}
status = hsa_executable_symbol_get_info((*func)->kernel_symbol,
status = hsa_executable_symbol_get_info((*func)->_kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&(*func)->kernel);
&(*func)->_kernel);
if(status != HSA_STATUS_SUCCESS){
return ihipLogStatus(hipErrorNotFound);
@@ -215,9 +215,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
return ihipLogStatus(ret);
}
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
const char *name){
HIP_INIT_API(name);
HIP_INIT_API(hfunc, hmod, name);
return ihipModuleGetFunction(hfunc, hmod, name);
}
@@ -226,8 +227,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra){
HIP_INIT_API(f->kernel);
void **kernelParams, void **extra)
{
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra);
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;
@@ -246,35 +252,47 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
memcpy(config, extra, sizeof(size_t)*5);
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
kernSize = *(size_t*)(config[3]);
}else{
} else {
return ihipLogStatus(hipErrorNotInitialized);
}
}else{
return ihipLogStatus(hipErrorInvalidValue);
}
/*
Kernel argument preparation.
*/
hsa_status_t status;
grid_launch_parm lp;
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp);
/*
Create signal
*/
uint32_t groupSegmentSize;
hsa_status_t status = hsa_executable_symbol_get_info(f->_kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&groupSegmentSize);
uint32_t privateSegmentSize;
status = hsa_executable_symbol_get_info(f->_kernelSymbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
&privateSegmentSize);
privateSegmentSize += sharedMemBytes;
/*
Kernel argument preparation.
*/
grid_launch_parm lp;
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName);
/*
Create signal
*/
hsa_signal_t signal;
status = hsa_signal_create(1, 0, NULL, &signal);
/*
Launch AQL packet
*/
/*
Launch AQL packet
*/
hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ,
gridDimX, gridDimY, gridDimZ, sharedMemBytes, config[1], kernSize, f->kernel);
gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel);
/*
Wait for signal
*/
/*
Wait for signal
*/
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
@@ -288,8 +306,9 @@ Kernel argument preparation.
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipModule_t hmod, const char* name){
HIP_INIT_API(name);
hipModule_t hmod, const char* name)
{
HIP_INIT_API(dptr, bytes, hmod, name);
hipError_t ret = hipSuccess;
if(dptr == NULL || bytes == NULL){
return ihipLogStatus(hipErrorInvalidValue);
@@ -301,13 +320,15 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipFunction_t func;
ihipModuleGetFunction(&func, hmod, name);
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
*dptr = reinterpret_cast<void*>(func->kernel);
*dptr = reinterpret_cast<void*>(func->_kernel);
return ihipLogStatus(ret);
}
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image){
HIP_INIT_API(image);
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
HIP_INIT_API(module, image);
hipError_t ret = hipSuccess;
if(image == NULL || module == NULL){
return ihipLogStatus(hipErrorNotInitialized);
+21
Zobrazit soubor
@@ -115,6 +115,27 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
};
//---
hipError_t hipStreamQuery(hipStream_t stream)
{
HIP_INIT_API(stream);
// Use default stream if 0 specified:
if (stream == hipStreamNull) {
ihipCtx_t *device = ihipGetTlsDefaultCtx();
stream = device->_defaultStream;
}
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
int pendingOps = crit->_av.get_pending_async_ops();
hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess;
return ihipLogStatus(e);
}
//---
hipError_t hipStreamSynchronize(hipStream_t stream)
{
+74 -73
Zobrazit soubor
@@ -2,74 +2,87 @@ cmake_minimum_required (VERSION 2.6)
# remove CMAKE_CXX_COMPILER entry from cache since it will be pointing to hipcc
unset(CMAKE_CXX_COMPILER CACHE)
message (CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER} )
# remove HIP_PATH entry from cache since we might be running tests with a different configuration
unset(HIP_PATH CACHE)
project (HIP_Unit_Tests)
project(HIP_Unit_Tests)
include(CTest)
set(HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR})
#include_directories( ${PROJECT_SOURCE_DIR}/include )
set (HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR} )
# The version number.
set (HIP_Unit_Test_VERSION_MAJOR 1)
set (HIP_Unit_Test_VERSION_MINOR 0)
# Enable multi-gpu tests
if(NOT DEFINED HIP_MULTI_GPU)
set(HIP_MULTI_GPU 0 CACHE BOOL "Run tests requiring more than one GPU")
endif()
if(NOT DEFINED HIP_BUILD_LOCAL)
if(NOT DEFINED ENV{HIP_BUILD_LOCAL})
set(HIP_BUILD_LOCAL 1 CACHE BOOL "Build HIP in local folder")
# Determine HIP_PATH
if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH})
# We are going to use HIP source...
get_filename_component(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../.. ABSOLUTE)
execute_process(
COMMAND "${CMAKE_COMMAND}" -E remove_directory hip
OUTPUT_QUIET
ERROR_QUIET
)
execute_process(
COMMAND "${CMAKE_COMMAND}" -E make_directory hip
OUTPUT_QUIET
ERROR_QUIET
)
message(STATUS "Configuring HIP")
# ...so need to build HIP locally.
execute_process(
COMMAND "${CMAKE_COMMAND}" -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ${HIP_SRC_PATH}
WORKING_DIRECTORY hip
RESULT_VARIABLE hip_build_result
OUTPUT_QUIET
ERROR_QUIET
)
if(hip_build_result)
message(FATAL_ERROR "Error configuring HIP")
else()
message(STATUS "Configuring HIP - done")
message(STATUS "Building HIP")
endif()
execute_process(
COMMAND "${CMAKE_COMMAND}" --build . --target install
WORKING_DIRECTORY hip
RESULT_VARIABLE hip_build_result
OUTPUT_VARIABLE hip_build_log
ERROR_QUIET
)
if(hip_build_result)
message(${hip_build_log})
message(FATAL_ERROR "Error building HIP")
else()
# Building HIP is successful. Point HIP_PATH to this location.
message(STATUS "Building HIP - done")
get_filename_component(HIP_PATH ${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ABSOLUTE)
endif()
# Add a target to rebuild HIP if HIP source changes.
add_custom_target(
hip ALL
COMMAND "${CMAKE_COMMAND}" --build . --target install
WORKING_DIRECTORY hip
)
else()
set(HIP_BUILD_LOCAL $ENV{HIP_BUILD_LOCAL} CACHE BOOL "Build HIP in local folder")
# We are using HIP_PATH from env. So just create a fake target.
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to installed HIP")
add_custom_target(hip ALL)
endif()
else()
# We are using HIP_PATH passed to cmake. So just create a fake target.
add_custom_target(hip ALL)
endif()
MESSAGE("HIP_PATH=" ${HIP_PATH})
set(HIP_PATH $ENV{HIP_PATH})
if (NOT DEFINED HIP_PATH)
get_filename_component (HIP_PATH ../.. ABSOLUTE)
endif()
# Determine HIP_PLATFORM
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
MESSAGE ("HIP_PATH=" ${HIP_PATH})
if (${HIP_PLATFORM} STREQUAL "hcc")
MESSAGE ("HIP_PLATFORM=hcc")
set (HSA_PATH $ENV{HSA_PATH})
if (NOT DEFINED HSA_PATH)
set (HSA_PATH /opt/rocm/hsa)
endif()
set (CODEXL_PATH $ENV{CODEXL_PATH})
if (NOT DEFINED CODEXL_PATH)
set (CODEXL_PATH /opt/AMD/CodeXL)
endif()
set (CODEXL_SDK_ATAL_PATH ${CODEXL_PATH}/SDK/AMDTActivityLogger)
#---
# Add HSA library:
add_library(hsa-runtime64 SHARED IMPORTED)
set_property(TARGET hsa-runtime64 PROPERTY IMPORTED_LOCATION "${HSA_PATH}/lib/libhsa-runtime64.so")
#These includes are used for all files.
#Include HIP and HC since the tests need both of these:
include_directories(${HIP_PATH}/include)
# This will create a subdir "hip_hcc" in the test build directory
# Any changes to hip_hcc source will be detected and force the library and then the tests to be rebuilt.
if (${HIP_BUILD_LOCAL})
add_subdirectory(${HIP_PATH} build.hip_hcc)
#link_directories(${CMAKE_CURRENT_BINARY_DIR}/build.hip_hcc) # search the local hip_hcc for libhip_hcc.a
set (CMAKE_CXX_FLAGS --hipcc_explicit_lib)
endif()
elseif (${HIP_PLATFORM} STREQUAL "nvcc")
MESSAGE ("HIP_PLATFORM=nvcc")
if(${HIP_PLATFORM} STREQUAL "hcc")
MESSAGE("HIP_PLATFORM=hcc")
elseif(${HIP_PLATFORM} STREQUAL "nvcc")
MESSAGE("HIP_PLATFORM=nvcc")
#Need C++11 for threads in some of the tests.
add_definitions(-std=c++11)
@@ -77,33 +90,21 @@ elseif (${HIP_PLATFORM} STREQUAL "nvcc")
# NVCC does not not support -rdynamic option
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS )
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS )
else()
MESSAGE (FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM})
MESSAGE(FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM})
endif()
set (HIPCC ${HIP_PATH}/bin/hipcc)
set (CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE)
set(HIPCC ${HIP_PATH}/bin/hipcc)
set(CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE)
add_library(test_common OBJECT test_common.cpp )
# usage : build_hip_executable (exe_name CPP_FILES)
macro (build_hip_executable exe cpp)
if (${HIP_PLATFORM} STREQUAL "hcc")
if (${HIP_BUILD_LOCAL})
#target_link_libraries(${exe} hip_hcc)
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> $<TARGET_OBJECTS:hip_hcc> )
else()
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
endif()
else()
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
endif()
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
add_dependencies(${exe} hip)
endmacro()
# Make a hip executable, using libc++
macro (build_hip_executable_libcpp exe cpp)
build_hip_executable( ${exe} ${cpp} ${ARGN} )
@@ -213,11 +214,11 @@ endif()
if (${HIP_PLATFORM} STREQUAL "hcc")
make_test(hipArray " ")
make_test(hipFuncSetDevice " ")
make_test(hipDynamicShared " ")
endif()
make_hipify_test(specialFunc.cu )
make_test(hipDynamicShared " ")
# Add subdirs here:
add_subdirectory(context)
+1 -1
Zobrazit soubor
@@ -34,7 +34,7 @@ int main(int argc, char *argv[])
hipCtx_t ctx;
hipCtx_t ctx1;
HIPCHECK(hipDeviceGetFromId(&device, 0));
HIPCHECK(hipDeviceGet(&device, 0));
HIPCHECK(hipCtxCreate(&ctx, 0, device));
HIPCHECK(hipCtxGetCurrent(&ctx1));
HIPCHECK(hipCtxGetDevice(&device1));
+6
Zobrazit soubor
@@ -86,19 +86,25 @@ __host__ void double_precision_math_functions()
nearbyint(0.0);
//nextafter(0.0);
//fX = 1.0; norm(1, &fX);
#if defined(__HIP_PLATFORM_HCC__)
norm3d(1.0, 0.0, 0.0);
norm4d(1.0, 0.0, 0.0, 0.0);
#endif
normcdf(0.0);
normcdfinv(1.0);
pow(1.0, 0.0);
rcbrt(1.0);
remainder(2.0, 1.0);
remquo(1.0, 2.0, &iX);
#if defined(__HIP_PLATFORM_HCC__)
rhypot(0.0, 1.0);
#endif
rint(1.0);
#if defined(__HIP_PLATFORM_HCC__)
fX = 1.0; rnorm(1, &fX);
rnorm3d(0.0, 0.0, 1.0);
rnorm4d(0.0, 0.0, 0.0, 1.0);
#endif
round(0.0);
rsqrt(1.0);
scalbln(0.0, 1);
+8
Zobrazit soubor
@@ -56,7 +56,9 @@ __host__ void single_precision_math_functions()
expm1f(0.0f);
fabsf(1.0f);
fdimf(1.0f, 0.0f);
#if defined(__HIP_PLATFORM_HCC__)
fdividef(0.0f, 1.0f);
#endif
floorf(0.0f);
fmaf(1.0f, 2.0f, 3.0f);
fmaxf(0.0f, 0.0f);
@@ -86,8 +88,10 @@ __host__ void single_precision_math_functions()
nanf("1");
nearbyintf(0.0f);
//nextafterf(0.0f);
#if defined(__HIP_PLATFORM_HCC__)
norm3df(1.0f, 0.0f, 0.0f);
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
#endif
normcdff(0.0f);
normcdfinvf(1.0f);
//fX = 1.0f; normf(1, &fX);
@@ -95,11 +99,15 @@ __host__ void single_precision_math_functions()
rcbrtf(1.0f);
remainderf(2.0f, 1.0f);
remquof(1.0f, 2.0f, &iX);
#if defined(__HIP_PLATFORM_HCC__)
rhypotf(0.0f, 1.0f);
#endif
rintf(1.0f);
#if defined(__HIP_PLATFORM_HCC__)
rnorm3df(0.0f, 0.0f, 1.0f);
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
fX = 1.0f; rnormf(1, &fX);
#endif
roundf(0.0f);
rsqrtf(1.0f);
scalblnf(0.0f, 1);
+17
Zobrazit soubor
@@ -0,0 +1,17 @@
#include <stdio.h>
#include <hip_runtime.h>
int main( void ) {
hipDeviceProp_t prop;
int dev;
hipGetDevice( &dev ) ;
printf( "ID of current HIP device: %d\n", dev );
memset( &prop, 0, sizeof( hipDeviceProp_t ) );
prop.major = 1;
prop.minor = 3;
hipChooseDevice( &dev, &prop );
printf( "ID of hip device closest to revision 1.3: %d\n", dev );
hipSetDevice( dev );
}
+5
Zobrazit soubor
@@ -27,7 +27,12 @@ template<typename T>
__global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T* B_d, T* C_d, size_t numElements, size_t groupElements) {
// declare dynamic shared memory
#if defined(__HIP_PLATFORM_HCC__)
HIP_DYNAMIC_SHARED(T, sdata)
#else
HIP_DYNAMIC_SHARED(__align__(sizeof(T)) unsigned char, my_sdata)
T *sdata = reinterpret_cast<T *>(my_sdata);
#endif
size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t tid = hipThreadIdx_x;
+4 -2
Zobrazit soubor
@@ -38,7 +38,7 @@ __global__ void vAdd(hipLaunchParm lp, float *a){}
cmd;\
hipDeviceSynchronize();\
gettimeofday(&stop, NULL);\
} while(0);
} while(0);
@@ -61,7 +61,9 @@ int main()
{
float *Ad;
hipMalloc((void**)&Ad, 1024);
hipLaunchKernel(vAdd, 1024, 1, 0, 0, Ad);
// Test the different hipLaunchParm options:
hipLaunchKernel(vAdd, size_t(1024), 1, 0, 0, Ad);
hipLaunchKernel(vAdd, 1024, dim3(1), 0, 0, Ad);
hipLaunchKernel(vAdd, dim3(1024), 1, 0, 0, Ad);
hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad);
+38 -7
Zobrazit soubor
@@ -27,13 +27,14 @@ unsigned p_streams = 6;
//------
// Structure for one stream;
template <typename T>
template <typename T>
class Streamer {
public:
Streamer(size_t numElements);
~Streamer();
void runAsync();
void waitComplete();
void queryUntilComplete();
private:
T *_A_h;
@@ -66,11 +67,24 @@ void Streamer<T>::runAsync()
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements);
// Test case where hipStreamWaitEvent waits on same event we just placed into the queue.
HIPCHECK(hipEventRecord(_event, _stream));
HIPCHECK(hipStreamWaitEvent(_stream, _event, 0));
}
template <typename T>
void Streamer<T>::queryUntilComplete()
{
int numQueries = 0;
hipError_t e = hipSuccess;
do {
numQueries++;
e = hipStreamQuery(_stream);
} while (e != hipSuccess) ;
printf ("completed after %d queries\n", numQueries);
};
@@ -99,7 +113,7 @@ void parseMyArguments(int argc, char *argv[])
//---
int main(int argc, char *argv[])
{
HipTest::parseStandardArguments(argc, argv, true);
HipTest::parseStandardArguments(argc, argv, false);
parseMyArguments(argc, argv);
typedef Streamer<float> FloatStreamer;
@@ -113,11 +127,28 @@ int main(int argc, char *argv[])
streamers.push_back(s);
}
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsync();
if (p_tests & 0x1) {
printf ("==> Test 0x1 runAsnc\n");
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsync();
}
HIPCHECK(hipDeviceSynchronize());
}
if (p_tests & 0x2) {
printf ("==> Test 0x2 queryUntilComplete\n");
for (int i=0; i<p_streams; i++) {
streamers[i]->runAsync();
streamers[i]->queryUntilComplete();
}
HIPCHECK(hipDeviceSynchronize());
}
if (p_tests & 0x4) {
hipStreamQuery(0/* try null stream*/);
}
HIPCHECK(hipDeviceSynchronize());
passed();
}