Merge branch 'amd-develop' into amd-master
Change-Id: Ieda710b081404e4805c568081ae8fe623f71d6ad
Tento commit je obsažen v:
@@ -116,7 +116,7 @@ endif()
|
||||
# Set if we need to build shared or static library
|
||||
if(NOT DEFINED HIP_LIB_TYPE)
|
||||
if(NOT DEFINED ENV{HIP_LIB_TYPE})
|
||||
set(HIP_LIB_TYPE 0)
|
||||
set(HIP_LIB_TYPE 1)
|
||||
else()
|
||||
set(HIP_LIB_TYPE $ENV{HIP_LIB_TYPE})
|
||||
endif()
|
||||
|
||||
@@ -31,6 +31,7 @@ HIP releases are typically of two types. The tag naming convention is different
|
||||
- [HIP Kernel Language](docs/markdown/hip_kernel_language.md)
|
||||
- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP)
|
||||
- [HIP Porting Guide](docs/markdown/hip_porting_guide.md)
|
||||
- [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md)
|
||||
- [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL)
|
||||
- [clang-hipify](clang-hipify/README.md)
|
||||
- [Developer/CONTRIBUTING Info](CONTRIBUTING.md)
|
||||
|
||||
Spustitelný soubor
+62
@@ -0,0 +1,62 @@
|
||||
#!/bin/bash
|
||||
|
||||
function die {
|
||||
echo "${1-Died}." >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ $# = 0 ]; then
|
||||
die "$(basename $0): Invalid number of arguments"
|
||||
fi
|
||||
|
||||
: ${ROCM_PATH:=/opt/rocm}
|
||||
: ${ROCM_TARGET:=fiji}
|
||||
|
||||
INPUT_FILES=""
|
||||
OUTPUT_FILE=""
|
||||
while [[ $# -gt 1 ]]; do
|
||||
key="$1"
|
||||
case $key in
|
||||
-o)
|
||||
OUTPUT_FILE="$2"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
INPUT_FILES="$INPUT_FILES $key"
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
[ INPUT_FILES != "" ] || die "No source files specified"
|
||||
[ OUTPUT_FILE != "" ] || die "Output file not specified"
|
||||
|
||||
SOURCE="${BASH_SOURCE[0]}"
|
||||
HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )"
|
||||
|
||||
export KMDUMPISA=1
|
||||
export KMDUMPLLVM=1
|
||||
hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX`
|
||||
hipgenisa_main=`mktemp src.XXXXXXXX.cpp`
|
||||
hipgenisa_files="$hipgenisa_main"
|
||||
|
||||
for inputfile in $INPUT_FILES; do
|
||||
sed 's/extern \+"C" \+//g' $inputfile > $inputfile.kernel.tmp.cpp
|
||||
hipgenisa_files="$hipgenisa_files $inputfile.kernel.tmp.cpp"
|
||||
done
|
||||
printf "\nint main(){}\n" >> $hipgenisa_main
|
||||
|
||||
$HIP_PATH/bin/hipcc $hipgenisa_files -o $hipgenisa_dir/a.out
|
||||
mv dump.* $hipgenisa_dir
|
||||
$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$ROCM_TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o
|
||||
$ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co
|
||||
|
||||
map_sym=""
|
||||
kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6)
|
||||
for mangled_sym in $kernels; do
|
||||
real_sym=$(c++filt $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g') | cut -d\( -f1)
|
||||
map_sym="--redefine-sym $mangled_sym=$real_sym $map_sym"
|
||||
done
|
||||
objcopy -F elf64-little $map_sym $hipgenisa_dir/dump.co $OUTPUT_FILE
|
||||
|
||||
rm $hipgenisa_files
|
||||
rm -r $hipgenisa_dir
|
||||
+1
-2
@@ -181,8 +181,7 @@ if ($verbose & 0x4) {
|
||||
# Handle code object generation
|
||||
my $ISACMD="";
|
||||
if($HIP_PLATFORM eq "hcc"){
|
||||
$ISACMD .= "$HIP_PATH/bin/hipgenisa.sh ";
|
||||
$ISACMD .= $ROCM_PATH;
|
||||
$ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh ";
|
||||
if($ARGV[0] eq "--genco"){
|
||||
foreach $isaarg (@ARGV[1..$#ARGV]){
|
||||
$ISACMD .= " ";
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ $1 = " " ]
|
||||
then
|
||||
exit
|
||||
fi
|
||||
|
||||
ROCM_PATH=$1
|
||||
GEN_ISA=$2
|
||||
FILE_NAMES=$3
|
||||
OUT=$4
|
||||
OUTPUT_FILE=$5
|
||||
TARGET=""
|
||||
if [ ${GEN_ISA:0:12} = "--target-isa" ]
|
||||
then
|
||||
TARGET=${GEN_ISA:13:12}
|
||||
fi
|
||||
|
||||
SOURCE="${BASH_SOURCE[0]}"
|
||||
HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )"
|
||||
|
||||
export KMDUMPISA=1
|
||||
export KMDUMPLLVM=1
|
||||
hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX`;
|
||||
sed 's/extern \+"C" \+//g' $FILE_NAMES > $FILE_NAMES.kernel.tmp.cpp
|
||||
echo "
|
||||
int main(){}
|
||||
" >> $FILE_NAMES.kernel.tmp.cpp
|
||||
$HIP_PATH/bin/hipcc $FILE_NAMES.kernel.tmp.cpp -o $hipgenisa_dir/a.out
|
||||
mv dump.* $hipgenisa_dir
|
||||
$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o
|
||||
$ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co
|
||||
map_sym=""
|
||||
kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6)
|
||||
for mangled_sym in $kernels
|
||||
do
|
||||
real_sym=$(c++filt $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g') | cut -d\( -f1)
|
||||
map_sym="--redefine-sym $mangled_sym=$real_sym $map_sym"
|
||||
done
|
||||
objcopy -F elf64-little $map_sym $hipgenisa_dir/dump.co $OUTPUT_FILE
|
||||
rm $FILE_NAMES.kernel.tmp.cpp
|
||||
rm -r $hipgenisa_dir
|
||||
+5
-5
@@ -699,12 +699,12 @@ sub warnUnsupportedSpecialFunctions
|
||||
#"__ballot",
|
||||
|
||||
#"__popc",
|
||||
#"__clz",
|
||||
#"__clz",
|
||||
|
||||
"__shfl",
|
||||
"__shfl_up",
|
||||
"__shfl_down",
|
||||
"__shfl_xor",
|
||||
#"__shfl",
|
||||
#"__shfl_up",
|
||||
#"__shfl_down",
|
||||
#"__shfl_xor",
|
||||
|
||||
"__prof_trigger",
|
||||
|
||||
|
||||
@@ -56,7 +56,8 @@ using namespace llvm;
|
||||
#define DEBUG_TYPE "cuda2hip"
|
||||
|
||||
enum ConvTypes {
|
||||
CONV_DEV = 0,
|
||||
CONV_DRIVER = 0,
|
||||
CONV_DEV,
|
||||
CONV_MEM,
|
||||
CONV_KERN,
|
||||
CONV_COORD_FUNC,
|
||||
@@ -64,6 +65,9 @@ enum ConvTypes {
|
||||
CONV_SPECIAL_FUNC,
|
||||
CONV_STREAM,
|
||||
CONV_EVENT,
|
||||
CONV_CONTEXT,
|
||||
CONV_MODULE,
|
||||
CONV_CACHE,
|
||||
CONV_ERR,
|
||||
CONV_DEF,
|
||||
CONV_TEX,
|
||||
@@ -77,11 +81,10 @@ enum ConvTypes {
|
||||
};
|
||||
|
||||
const char *counterNames[CONV_LAST] = {
|
||||
"dev", "mem", "kern", "coord_func", "math_func",
|
||||
"special_func", "stream", "event", "err", "def",
|
||||
"tex", "other", "include", "include_cuda_main_header",
|
||||
"driver", "dev", "mem", "kern", "coord_func", "math_func",
|
||||
"special_func", "stream", "event", "ctx", "module", "cache",
|
||||
"err", "def", "tex", "other", "include", "include_cuda_main_header",
|
||||
"type", "literal", "numeric_literal"};
|
||||
|
||||
enum ApiTypes {
|
||||
API_DRIVER = 0,
|
||||
API_RUNTIME,
|
||||
@@ -90,7 +93,7 @@ enum ApiTypes {
|
||||
};
|
||||
|
||||
const char *apiNames[API_LAST] = {
|
||||
"CUDA API", "CUDA RT API", "CUDA BLAS API"};
|
||||
"CUDA", "CUDA RT", "CUBLAS"};
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -294,10 +297,10 @@ struct cuda2hipMap {
|
||||
|
||||
cuda2hipRename["CUfunc_cache_enum"] = {"hipFuncCache", CONV_TYPE, API_DRIVER};
|
||||
cuda2hipRename["CUfunc_cache"] = {"hipFuncCache", CONV_TYPE, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_CACHE, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_CACHE, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_DRIVER};
|
||||
cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_DRIVER};
|
||||
|
||||
cuda2hipRename["CUsharedconfig_enum"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER};
|
||||
cuda2hipRename["CUsharedconfig"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER};
|
||||
@@ -308,7 +311,81 @@ struct cuda2hipMap {
|
||||
cuda2hipRename["CUcontext"] = {"hipCtx_t", CONV_TYPE, API_DRIVER};
|
||||
cuda2hipRename["CUmodule"] = {"hipModule_t", CONV_TYPE, API_DRIVER};
|
||||
cuda2hipRename["CUevent"] = {"hipEvent_t", CONV_TYPE, API_DRIVER};
|
||||
// Event Flags
|
||||
cuda2hipRename["CU_EVENT_DEFAULT"] = {"hipEventDefault", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["CU_EVENT_BLOCKING_SYNC"] = {"hipEventBlockingSync", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["CU_EVENT_DISABLE_TIMING"] = {"hipEventDisableTiming", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["CU_EVENT_INTERPROCESS"] = {"hipEventInterprocess", CONV_EVENT, API_DRIVER};
|
||||
|
||||
cuda2hipRename["CUstream"] = {"hipStream_t", CONV_TYPE, API_DRIVER};
|
||||
// Stream Flags
|
||||
cuda2hipRename["CU_STREAM_DEFAULT"] = {"hipStreamDefault", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["CU_STREAM_NON_BLOCKING"] = {"hipStreamNonBlocking", CONV_STREAM, API_DRIVER};
|
||||
|
||||
// Context
|
||||
cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxDestroy_v2"] = {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxPopCurrent_v2"] = {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxPushCurrent_v2"] = {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxSetCurrent"] = {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetCurrent"] = {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetDevice"] = {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetApiVersion"] = {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetCacheConfig"] = {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxSetCacheConfig"] = {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxSetSharedMemConfig"] = {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetSharedMemConfig"] = {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxSynchronize"] = {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxGetFlags"] = {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_CONTEXT, API_DRIVER};
|
||||
cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_CONTEXT, API_DRIVER};
|
||||
// unsupported yet by HIP
|
||||
// cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER};
|
||||
// cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_CONTEXT, API_DRIVER};
|
||||
|
||||
// Device
|
||||
cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEV, API_DRIVER};
|
||||
// unsupported yet by HIP
|
||||
// cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEV, API_DRIVER};
|
||||
cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_DRIVER};
|
||||
|
||||
// Driver
|
||||
cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_DRIVER};
|
||||
|
||||
// Events
|
||||
cuda2hipRename["cuEventCreate"] = {"hipEventCreate", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["cuEventDestroy_v2"] = {"hipEventDestroy", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["cuEventElapsedTime"] = {"hipEventElapsedTime", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["cuEventQuery"] = {"hipEventQuery", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["cuEventRecord"] = {"hipEventRecord", CONV_EVENT, API_DRIVER};
|
||||
cuda2hipRename["cuEventSynchronize"] = {"hipEventSynchronize", CONV_EVENT, API_DRIVER};
|
||||
|
||||
// Module
|
||||
cuda2hipRename["cuModuleGetFunction"] = {"hipModuleGetFunction", CONV_MODULE, API_DRIVER};
|
||||
cuda2hipRename["cuModuleGetGlobal_v2"] = {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER};
|
||||
cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER};
|
||||
cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER};
|
||||
// unsupported yet by HIP
|
||||
// cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER};
|
||||
// cuda2hipRename["cuModuleLoadFatBinary"] = {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER};
|
||||
cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER};
|
||||
cuda2hipRename["cuLaunchKernel"] = {"hipModuleLaunchKernel", CONV_MODULE, API_DRIVER};
|
||||
|
||||
// Streams
|
||||
// unsupported yet by HIP
|
||||
// cuda2hipRename["cuStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["cuStreamCreate"] = {"hipStreamCreate", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["cuStreamDestroy_v2"] = {"hipStreamDestroy", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["cuStreamQuery"] = {"hipStreamQuery", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["cuStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_DRIVER};
|
||||
cuda2hipRename["cuStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER};
|
||||
|
||||
/////////////////////////////// CUDA RT API ///////////////////////////////
|
||||
// Error API
|
||||
@@ -411,8 +488,8 @@ struct cuda2hipMap {
|
||||
cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM, API_RUNTIME};
|
||||
cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME};
|
||||
cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME};
|
||||
// Stream Flags
|
||||
cuda2hipRename["cudaStreamGetFlags"] = {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME};
|
||||
// Stream Flags
|
||||
cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM, API_RUNTIME};
|
||||
cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_STREAM, API_RUNTIME};
|
||||
|
||||
@@ -425,6 +502,7 @@ struct cuda2hipMap {
|
||||
cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME};
|
||||
|
||||
// Attributes
|
||||
cuda2hipRename["cudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME};
|
||||
@@ -482,22 +560,21 @@ struct cuda2hipMap {
|
||||
//cuda2hipRename["cudaDeviceMask"] = {"hipDeviceMask", CONV_DEV, API_RUNTIME};
|
||||
|
||||
// Cache config
|
||||
cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME};
|
||||
// translate deprecated
|
||||
cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME};
|
||||
// translate deprecated
|
||||
cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_RUNTIME};
|
||||
cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_CACHE, API_RUNTIME};
|
||||
|
||||
// Driver/Runtime
|
||||
cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME};
|
||||
cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_RUNTIME};
|
||||
// unsupported yet
|
||||
//cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_DEV, API_RUNTIME};
|
||||
|
||||
|
||||
@@ -179,7 +179,7 @@ hip_find_helper_file(run_hipcc cmake)
|
||||
###############################################################################
|
||||
|
||||
###############################################################################
|
||||
# MACRO: Seperate the options from the sources
|
||||
# MACRO: Separate the options from the sources
|
||||
###############################################################################
|
||||
macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options)
|
||||
set(${_sources})
|
||||
@@ -303,7 +303,7 @@ macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file)
|
||||
endforeach()
|
||||
else()
|
||||
# No dependencies, so regenerate the file
|
||||
set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
|
||||
set(HIP_HIPCC_DEPEND_REGENERATE TRUE)
|
||||
endif()
|
||||
|
||||
# Regenerate the dependency file if needed
|
||||
@@ -416,7 +416,7 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files)
|
||||
|
||||
# Create up the comment string
|
||||
file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
|
||||
set(hip_build_comment_string "Building HIPCC (${cuda_build_type}) object ${generated_file_relative_path}")
|
||||
set(hip_build_comment_string "Building HIPCC (using ${HIP_PLATFORM}) object ${generated_file_relative_path}")
|
||||
|
||||
# Build the generated file and dependency file
|
||||
add_custom_command(
|
||||
@@ -453,11 +453,9 @@ macro(HIP_ADD_EXECUTABLE hip_target)
|
||||
# Separate the sources from the options
|
||||
HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN})
|
||||
HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
|
||||
set(HIP_CMAKE_CXX_LINK_EXECUTABLE ${CMAKE_CXX_LINK_EXECUTABLE})
|
||||
set(CMAKE_CXX_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET>")
|
||||
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET>")
|
||||
add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources})
|
||||
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX})
|
||||
#set(CMAKE_CXX_COMPILER ${ORIGINAL_CMAKE_CXX_COMPILER})
|
||||
set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP)
|
||||
endmacro()
|
||||
|
||||
# vim: ts=4:sw=4:expandtab:smartindent
|
||||
|
||||
@@ -22,7 +22,7 @@ endif()
|
||||
|
||||
# Set these up as variables to make reading the generated file easier
|
||||
set(HIP_HIPCC_EXECUTABLE "@HIP_HIPCC_EXECUTABLE@") # path
|
||||
set(HIP_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path
|
||||
set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path
|
||||
set(HIP_PLATFORM "@HIP_PLATFORM@") #string
|
||||
set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path
|
||||
set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path
|
||||
|
||||
@@ -15,3 +15,5 @@ The default device can be set with hipSetDevice.
|
||||
- hipify - tool to convert CUDA(R) code to portable C++ code.
|
||||
- hipconfig - tool to report various confoguration properties of the target platform.
|
||||
|
||||
- nvcc = nvcc compiler, do not capitalize.
|
||||
- hcc = heterogeneous compute compiler, do not capitalize.
|
||||
|
||||
@@ -25,7 +25,7 @@ THE SOFTWARE.
|
||||
#include "hip/hcc_detail/unpinned_copy_engine.h"
|
||||
|
||||
|
||||
#if defined(__HCC__) && (__hcc_workweek__ < 16186)
|
||||
#if defined(__HCC__) && (__hcc_workweek__ < 16354)
|
||||
#error("This version of HIP requires a newer version of HCC.");
|
||||
#endif
|
||||
|
||||
@@ -87,7 +87,8 @@ class ihipCtx_t;
|
||||
#define KCYN "\x1B[36m"
|
||||
#define KWHT "\x1B[37m"
|
||||
|
||||
#define API_COLOR KGRN
|
||||
extern const char *API_COLOR;
|
||||
extern const char *API_COLOR_END;
|
||||
|
||||
|
||||
// If set, thread-safety is enforced on all stream functions.
|
||||
@@ -149,7 +150,7 @@ class ihipCtx_t;
|
||||
if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {\
|
||||
std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {\
|
||||
fprintf (stderr, API_COLOR "<<hip-api: %s\n" KNRM, s.c_str());\
|
||||
fprintf (stderr, "%s<<hip-api: %s\n%s" , API_COLOR, s.c_str(), API_COLOR_END);\
|
||||
}\
|
||||
SCOPED_MARKER(s.c_str(), "HIP", NULL);\
|
||||
}\
|
||||
@@ -179,7 +180,7 @@ class ihipCtx_t;
|
||||
tls_lastHipError = localHipStatus;\
|
||||
\
|
||||
if ((COMPILE_HIP_TRACE_API & 0x2) && HIP_TRACE_API) {\
|
||||
fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>\n" KNRM, (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus));\
|
||||
fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\
|
||||
}\
|
||||
localHipStatus;\
|
||||
})
|
||||
@@ -365,8 +366,23 @@ public:
|
||||
|
||||
class ihipFunction_t{
|
||||
public:
|
||||
hsa_executable_symbol_t kernel_symbol;
|
||||
uint64_t kernel;
|
||||
ihipFunction_t(const char *name) {
|
||||
size_t nameSz = strlen(name);
|
||||
char *kernelName = (char*)malloc(nameSz);
|
||||
strncpy(kernelName, name, nameSz);
|
||||
_kernelName = kernelName;
|
||||
};
|
||||
|
||||
~ihipFunction_t() {
|
||||
if (_kernelName) {
|
||||
free((void*)_kernelName);
|
||||
_kernelName = NULL;
|
||||
};
|
||||
};
|
||||
public:
|
||||
const char *_kernelName;
|
||||
hsa_executable_symbol_t _kernelSymbol;
|
||||
uint64_t _kernel;
|
||||
};
|
||||
|
||||
|
||||
@@ -415,6 +431,9 @@ public:
|
||||
SIGSEQNUM _streamSigId; // Monotonically increasing unique signal id.
|
||||
|
||||
hc::accelerator_view _av;
|
||||
|
||||
std::vector<hc::completion_future*> _cfs;
|
||||
|
||||
};
|
||||
|
||||
|
||||
@@ -452,6 +471,9 @@ typedef uint64_t SeqNum_t ;
|
||||
void locked_waitEvent(hipEvent_t event);
|
||||
void locked_recordEvent(hipEvent_t event);
|
||||
|
||||
void addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future* cf);
|
||||
void waitOnAllCFs(LockedAccessor_StreamCrit_t &crit);
|
||||
|
||||
//---
|
||||
|
||||
// Use this if we already have the stream critical data mutex:
|
||||
@@ -460,7 +482,8 @@ typedef uint64_t SeqNum_t ;
|
||||
void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel);
|
||||
uint32_t groupSegmentSize, uint32_t sharedMemBytes,
|
||||
void *kernarg, size_t kernSize, uint64_t kernel);
|
||||
|
||||
// Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function.
|
||||
SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; };
|
||||
@@ -498,6 +521,7 @@ private: // Data
|
||||
|
||||
// Friends:
|
||||
friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s);
|
||||
friend hipError_t hipStreamQuery(hipStream_t);
|
||||
};
|
||||
|
||||
|
||||
@@ -718,6 +742,18 @@ inline std::ostream & operator<<(std::ostream& os, const dim3& s)
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s)
|
||||
{
|
||||
os << '{';
|
||||
os << s.x;
|
||||
os << ',';
|
||||
os << s.y;
|
||||
os << ',';
|
||||
os << s.z;
|
||||
os << '}';
|
||||
return os;
|
||||
}
|
||||
|
||||
// Stream printf functions:
|
||||
inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
|
||||
{
|
||||
|
||||
@@ -621,25 +621,19 @@ __device__ static inline void* memset(void* ptr, uint8_t val, size_t size)
|
||||
#define HIP_KERNEL_NAME(...) __VA_ARGS__
|
||||
|
||||
#ifdef __HCC_CPP__
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp);
|
||||
extern void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr);
|
||||
extern void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp);
|
||||
|
||||
// TODO - move to common header file.
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KGRN "\x1B[32m"
|
||||
|
||||
// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types
|
||||
#define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \
|
||||
do {\
|
||||
grid_launch_parm lp;\
|
||||
lp.dynamic_group_mem_bytes = _groupMemBytes; \
|
||||
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \
|
||||
if (HIP_TRACE_API) {\
|
||||
ihipPrintKernelLaunch(#_kernelName, &lp, _stream); \
|
||||
}\
|
||||
hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \
|
||||
_kernelName (lp, ##__VA_ARGS__);\
|
||||
ihipPostLaunchKernel(trueStream, lp);\
|
||||
} while(0)
|
||||
|
||||
@@ -278,6 +278,11 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceI
|
||||
* @param [out] prop written with device properties
|
||||
* @param [in] deviceId which device to query for information
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
* @bug HCC always returns 0 for maxThreadsPerMultiProcessor
|
||||
* @bug HCC always returns 0 for regsPerBlock
|
||||
* @bug HCC always returns 0 for l2CacheSize
|
||||
*
|
||||
* Populates hipGetDeviceProperties with information for the specified device.
|
||||
*/
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
|
||||
@@ -289,6 +294,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId);
|
||||
/**
|
||||
* @brief Set L1/Shared cache partition.
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
@@ -298,6 +304,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig );
|
||||
/**
|
||||
* @brief Set Cache configuration for a specific function
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
@@ -307,6 +314,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig );
|
||||
/**
|
||||
* @brief Set Cache configuration for a specific function
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
@@ -318,6 +326,7 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache config );
|
||||
/**
|
||||
* @brief Get Shared memory bank configuration.
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
@@ -327,6 +336,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig );
|
||||
/**
|
||||
* @brief Set Shared memory bank configuration.
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures.
|
||||
*
|
||||
*/
|
||||
@@ -335,6 +345,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config );
|
||||
/**
|
||||
* @brief Set Device flags
|
||||
*
|
||||
* @returns #hipSuccess
|
||||
* Note: Only hipDeviceScheduleAuto and hipDeviceMapHost are supported
|
||||
*
|
||||
*/
|
||||
@@ -345,6 +356,14 @@ hipError_t hipSetDeviceFlags ( unsigned flags);
|
||||
* @}
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Select compute-device which best matches criteria.
|
||||
*
|
||||
* @param [out] device ID
|
||||
* @param [in] device properties pointer
|
||||
*
|
||||
*/
|
||||
hipError_t hipChooseDevice(int *device,hipDeviceProp_t* prop);
|
||||
|
||||
/**
|
||||
*-------------------------------------------------------------------------------------------------
|
||||
@@ -474,6 +493,20 @@ hipError_t hipStreamCreate(hipStream_t *stream);
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or #hipErrorNotReady if not.
|
||||
*
|
||||
* @param[in] stream stream to query
|
||||
*
|
||||
* @return #hipSuccess, #hipErrorNotReady
|
||||
*
|
||||
* This is thread-safe and returns a snapshot of the current state of the queue. However, if other host threads are sending work to the stream,
|
||||
* the status may change immediately after the function is called. It is typically used for debug.
|
||||
*/
|
||||
hipError_t hipStreamQuery(hipStream_t stream);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Wait for all commands in stream to complete.
|
||||
*
|
||||
@@ -726,7 +759,7 @@ hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute
|
||||
hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags) ;
|
||||
|
||||
/**
|
||||
* @brief Return flags associated with host pointer
|
||||
* @brief Return flags associated with host pointer
|
||||
*
|
||||
* @param[out] flagsPtr Memory location to store flags
|
||||
* @param[in] hostPtr Host Pointer allocated through hipHostMalloc
|
||||
@@ -1227,13 +1260,6 @@ hipError_t hipCtxDisablePeerAccess (hipCtx_t peerCtx);
|
||||
* @}
|
||||
*/
|
||||
|
||||
|
||||
// TODO-ctx
|
||||
/**
|
||||
* @return hipSuccess, hipErrorInvalidDevice
|
||||
*/
|
||||
hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId);
|
||||
|
||||
/**
|
||||
* @brief Returns a handle to a compute device
|
||||
* @param [out] device
|
||||
@@ -1299,10 +1325,10 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
|
||||
*
|
||||
* @param [in] fname
|
||||
* @param [out] module
|
||||
*
|
||||
*
|
||||
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized
|
||||
*
|
||||
*
|
||||
*
|
||||
*/
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
|
||||
|
||||
@@ -1313,7 +1339,7 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname);
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidValue
|
||||
* module is freed and the code objects associated with it are destroyed
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
hipError_t hipModuleUnload(hipModule_t module);
|
||||
@@ -1325,7 +1351,7 @@ hipError_t hipModuleUnload(hipModule_t module);
|
||||
* @param [in] kname
|
||||
* @param [out] function
|
||||
*
|
||||
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound,
|
||||
* @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound,
|
||||
*/
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname);
|
||||
|
||||
|
||||
@@ -5,17 +5,13 @@ endif
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler)
|
||||
|
||||
ifeq (${HIP_PLATFORM}, hcc)
|
||||
GENCO_FLAGS=--target-isa=fiji
|
||||
endif
|
||||
|
||||
all: vcpy_kernel.code runKernel.hip.out
|
||||
|
||||
runKernel.hip.out: runKernel.cpp
|
||||
$(HIPCC) $(HIPCC_FLAGS) $< -o $@
|
||||
|
||||
vcpy_kernel.code: vcpy_kernel.cpp
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $< -o $@
|
||||
$(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@
|
||||
|
||||
clean:
|
||||
rm -f *.code *.out
|
||||
|
||||
+39
-82
@@ -24,10 +24,6 @@ THE SOFTWARE.
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//Devices
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//---
|
||||
/**
|
||||
* @return #hipSuccess, hipErrorInvalidDevice
|
||||
*/
|
||||
// TODO - does this initialize HIP runtime?
|
||||
hipError_t hipGetDevice(int *deviceId)
|
||||
{
|
||||
@@ -47,11 +43,6 @@ hipError_t hipGetDevice(int *deviceId)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @return #hipSuccess, #hipErrorNoDevice
|
||||
*/
|
||||
// TODO - does this initialize HIP runtime?
|
||||
hipError_t hipGetDeviceCount(int *count)
|
||||
{
|
||||
@@ -66,11 +57,6 @@ hipError_t hipGetDeviceCount(int *count)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @returns #hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
@@ -80,11 +66,6 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @returns #hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
@@ -94,11 +75,6 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @returns #hipSuccess
|
||||
*/
|
||||
hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
{
|
||||
HIP_INIT_API(cacheConfig);
|
||||
@@ -108,12 +84,6 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @returns #hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
|
||||
{
|
||||
HIP_INIT_API(config);
|
||||
@@ -123,12 +93,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @returns #hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
|
||||
{
|
||||
HIP_INIT_API(pConfig);
|
||||
@@ -138,10 +102,6 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig )
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
//---
|
||||
/**
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
*/
|
||||
hipError_t hipSetDevice(int deviceId)
|
||||
{
|
||||
HIP_INIT_API(deviceId);
|
||||
@@ -153,22 +113,12 @@ hipError_t hipSetDevice(int deviceId)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @return #hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceSynchronize(void)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
return ihipSynchronize();
|
||||
return ihipLogStatus(ihipSynchronize());
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @return @ref hipSuccess
|
||||
*/
|
||||
hipError_t hipDeviceReset(void)
|
||||
{
|
||||
HIP_INIT_API();
|
||||
@@ -182,15 +132,12 @@ hipError_t hipDeviceReset(void)
|
||||
|
||||
if (ctx) {
|
||||
// Release ctx resources (streams and memory):
|
||||
ctx->locked_reset();
|
||||
ctx->locked_reset();
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
{
|
||||
HIP_INIT_API(pi, attr, device);
|
||||
@@ -260,13 +207,6 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return #hipSuccess, #hipErrorInvalidDevice
|
||||
* @bug HCC always returns 0 for maxThreadsPerMultiProcessor
|
||||
* @bug HCC always returns 0 for regsPerBlock
|
||||
* @bug HCC always returns 0 for l2CacheSize
|
||||
*/
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
|
||||
{
|
||||
HIP_INIT_API(props, device);
|
||||
@@ -285,7 +225,6 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device)
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipSetDeviceFlags( unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(flags);
|
||||
@@ -306,25 +245,6 @@ hipError_t hipSetDeviceFlags( unsigned int flags)
|
||||
return ihipLogStatus(e);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId)
|
||||
{
|
||||
HIP_INIT_API(device, deviceId);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
*device = ihipGetDevice(deviceId);
|
||||
|
||||
if (device == nullptr) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device)
|
||||
{
|
||||
HIP_INIT_API(major,minor, device);
|
||||
@@ -361,3 +281,40 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device)
|
||||
*bytes= device->_props.totalGlobalMem;
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipChooseDevice( int* device, const hipDeviceProp_t* prop )
|
||||
{
|
||||
hipDeviceProp_t tempProp;
|
||||
int deviceCount;
|
||||
int inPropCount=0;
|
||||
int matchedPropCount=0;
|
||||
hipError_t e = hipSuccess;
|
||||
hipGetDeviceCount( &deviceCount );
|
||||
*device = 0;
|
||||
for (int i=0; i< deviceCount; i++) {
|
||||
hipGetDeviceProperties( &tempProp, i );
|
||||
if(prop->major !=0) {
|
||||
inPropCount++;
|
||||
if(tempProp.major >= prop->major) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
if(prop->minor !=0) {
|
||||
inPropCount++;
|
||||
if(tempProp.minor >= prop->minor) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(inPropCount == matchedPropCount) {
|
||||
*device = i;
|
||||
}
|
||||
#if 0
|
||||
else{
|
||||
e= hipErrorInvalidValue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
+111
-14
@@ -57,10 +57,14 @@ const int release = 1;
|
||||
#define MEMCPY_H2D_DIRECT_VS_STAGING_COPY_THRESHOLD 65336
|
||||
#define MEMCPY_H2D_STAGING_VS_PININPLACE_COPY_THRESHOLD 1048576
|
||||
|
||||
const char *API_COLOR = KGRN;
|
||||
const char *API_COLOR_END = KNRM;
|
||||
|
||||
int HIP_LAUNCH_BLOCKING = 0;
|
||||
|
||||
int HIP_PRINT_ENV = 0;
|
||||
int HIP_TRACE_API= 0;
|
||||
std::string HIP_TRACE_API_COLOR("green");
|
||||
int HIP_ATP_MARKER= 0;
|
||||
int HIP_DB= 0;
|
||||
int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */
|
||||
@@ -246,7 +250,8 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
|
||||
{
|
||||
if (! assertQueueEmpty) {
|
||||
tprintf (DB_SYNC, "stream %p wait for queue-empty..\n", this);
|
||||
crit->_av.wait();
|
||||
// crit->_av.wait();
|
||||
waitOnAllCFs(crit);
|
||||
}
|
||||
|
||||
if (crit->_last_copy_signal) {
|
||||
@@ -262,6 +267,21 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
|
||||
// crit->_signalCnt = 0;
|
||||
}
|
||||
|
||||
void ihipStream_t::addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future *cf)
|
||||
{
|
||||
crit->_cfs.push_back(cf);
|
||||
}
|
||||
|
||||
void ihipStream_t::waitOnAllCFs(LockedAccessor_StreamCrit_t &crit)
|
||||
{
|
||||
for(uint32_t i=0;i<crit->_cfs.size();i++){
|
||||
if(crit->_cfs[i] != NULL){
|
||||
crit->_cfs[i]->wait();
|
||||
delete crit->_cfs[i];
|
||||
}
|
||||
}
|
||||
crit->_cfs.clear();
|
||||
}
|
||||
|
||||
//---
|
||||
//Wait for all kernel and data copy commands in this stream to complete.
|
||||
@@ -522,7 +542,8 @@ void ihipStream_t::launchModuleKernel(
|
||||
uint32_t gridDimX,
|
||||
uint32_t gridDimY,
|
||||
uint32_t gridDimZ,
|
||||
uint32_t sharedMemBytes,
|
||||
uint32_t groupSegmentSize,
|
||||
uint32_t privateSegmentSize,
|
||||
void *kernarg,
|
||||
size_t kernSize,
|
||||
uint64_t kernel){
|
||||
@@ -545,8 +566,8 @@ void ihipStream_t::launchModuleKernel(
|
||||
dispatch_packet->grid_size_x = blockDimX * gridDimX;
|
||||
dispatch_packet->grid_size_y = blockDimY * gridDimY;
|
||||
dispatch_packet->grid_size_z = blockDimZ * gridDimZ;
|
||||
dispatch_packet->group_segment_size = 0;
|
||||
dispatch_packet->private_segment_size = sharedMemBytes;
|
||||
dispatch_packet->group_segment_size = groupSegmentSize;
|
||||
dispatch_packet->private_segment_size = privateSegmentSize;
|
||||
dispatch_packet->kernarg_address = kern;
|
||||
dispatch_packet->kernel_object = kernel;
|
||||
uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
@@ -1122,6 +1143,7 @@ void ihipCtx_t::locked_waitAllStreams()
|
||||
|
||||
|
||||
|
||||
//---
|
||||
// Read environment variables.
|
||||
void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, const char *description)
|
||||
{
|
||||
@@ -1132,6 +1154,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
|
||||
env = getenv(var_name2);
|
||||
}
|
||||
|
||||
// TODO: Refactor this code so it is a separate call rather than being part of ihipReadEnv_I, which should only read integers.
|
||||
// Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which
|
||||
// contains a sequence of comma-separated device IDs
|
||||
if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){
|
||||
@@ -1169,15 +1192,37 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
|
||||
printf ("%-30s = %2d : %s\n", var_name1, *var_ptr, description);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void ihipReadEnv_S(std::string *var_ptr, const char *var_name1, const char *var_name2, const char *description)
|
||||
{
|
||||
char * env = getenv(var_name1);
|
||||
|
||||
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
|
||||
if ((env == NULL) && strcmp(var_name2, "0")) {
|
||||
env = getenv(var_name2);
|
||||
}
|
||||
|
||||
if (env) {
|
||||
*var_ptr = env;
|
||||
}
|
||||
if (HIP_PRINT_ENV) {
|
||||
printf ("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined (DEBUG)
|
||||
|
||||
#define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \
|
||||
if ((_build == release) || (_build == debug) {\
|
||||
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
|
||||
};
|
||||
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
|
||||
if ((_build == release) || (_build == debug) {\
|
||||
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
@@ -1186,6 +1231,11 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
|
||||
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
|
||||
};
|
||||
|
||||
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
|
||||
if (_build == release) {\
|
||||
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1218,6 +1268,7 @@ void ihipInit()
|
||||
}
|
||||
|
||||
READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes.");
|
||||
READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White");
|
||||
READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL");
|
||||
READ_ENV_I(release, HIP_STAGING_SIZE, 0, "Size of each staging buffer (in KB)" );
|
||||
READ_ENV_I(release, HIP_STAGING_BUFFERS, 0, "Number of staging buffers to use in each direction. 0=use hsa_memory_copy.");
|
||||
@@ -1261,6 +1312,31 @@ void ihipInit()
|
||||
fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER);
|
||||
}
|
||||
|
||||
std::transform(HIP_TRACE_API_COLOR.begin(), HIP_TRACE_API_COLOR.end(), HIP_TRACE_API_COLOR.begin(), ::tolower);
|
||||
|
||||
if (HIP_TRACE_API_COLOR == "none") {
|
||||
API_COLOR = "";
|
||||
API_COLOR_END = "";
|
||||
} else if (HIP_TRACE_API_COLOR == "red") {
|
||||
API_COLOR = KRED;
|
||||
} else if (HIP_TRACE_API_COLOR == "green") {
|
||||
API_COLOR = KGRN;
|
||||
} else if (HIP_TRACE_API_COLOR == "yellow") {
|
||||
API_COLOR = KYEL;
|
||||
} else if (HIP_TRACE_API_COLOR == "blue") {
|
||||
API_COLOR = KBLU;
|
||||
} else if (HIP_TRACE_API_COLOR == "magenta") {
|
||||
API_COLOR = KMAG;
|
||||
} else if (HIP_TRACE_API_COLOR == "cyan") {
|
||||
API_COLOR = KCYN;
|
||||
} else if (HIP_TRACE_API_COLOR == "white") {
|
||||
API_COLOR = KWHT;
|
||||
} else {
|
||||
fprintf (stderr, "warning: env var HIP_TRACE_API_COLOR=%s must be None/Red/Green/Yellow/Blue/Magenta/Cyan/White", HIP_TRACE_API_COLOR.c_str());
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Build a table of valid compute devices.
|
||||
@@ -1332,7 +1408,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
|
||||
#endif
|
||||
return device->_defaultStream;
|
||||
} else {
|
||||
// Have to wait for legacy default stream to be empty:
|
||||
// ALl streams have to wait for legacy default stream to be empty:
|
||||
if (!(stream->_flags & hipStreamNonBlocking)) {
|
||||
tprintf(DB_SYNC, "stream %p wait default stream\n", stream);
|
||||
stream->getCtx()->_defaultStream->locked_wait();
|
||||
@@ -1344,16 +1420,26 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
|
||||
|
||||
void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream)
|
||||
{
|
||||
std::string streamString = ToString(stream);
|
||||
fprintf(stderr, KGRN "<<hip-api: hipLaunchKernel '%s' gridDim:(%d,%d,%d) groupDim:(%d,%d,%d) groupMem:+%d %s\n" KNRM, \
|
||||
kernelName, lp->grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z,
|
||||
lp->dynamic_group_mem_bytes, streamString.c_str());\
|
||||
if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {
|
||||
std::stringstream os;
|
||||
os << "<<hip-api: hipLaunchKernel '" << kernelName << "'"
|
||||
<< " gridDim:" << lp->grid_dim
|
||||
<< " groupDim:" << lp->group_dim
|
||||
<< " sharedMem:+" << lp->dynamic_group_mem_bytes
|
||||
<< " " << *stream;
|
||||
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::cerr << API_COLOR << os.str() << API_COLOR_END << std::endl;
|
||||
}
|
||||
SCOPED_MARKER(os.str().c_str(), "HIP", NULL);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO - data-up to data-down:
|
||||
// Called just before a kernel is launched from hipLaunchKernel.
|
||||
// Allows runtime to track some information about the stream.
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp)
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr)
|
||||
{
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -1369,11 +1455,14 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_
|
||||
auto crit = stream->lockopen_preKernelCommand();
|
||||
lp->av = &(crit->_av);
|
||||
lp->cf = new hc::completion_future;
|
||||
stream->addCFtoStream(crit, lp->cf);
|
||||
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
|
||||
|
||||
return (stream);
|
||||
}
|
||||
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp)
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr)
|
||||
{
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -1389,11 +1478,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri
|
||||
auto crit = stream->lockopen_preKernelCommand();
|
||||
lp->av = &(crit->_av);
|
||||
lp->cf = new hc::completion_future;
|
||||
stream->addCFtoStream(crit, lp->cf);
|
||||
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
|
||||
return (stream);
|
||||
}
|
||||
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp)
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr)
|
||||
{
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -1409,11 +1500,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri
|
||||
auto crit = stream->lockopen_preKernelCommand();
|
||||
lp->av = &(crit->_av);
|
||||
lp->cf = new hc::completion_future;
|
||||
stream->addCFtoStream(crit, lp->cf);
|
||||
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
|
||||
return (stream);
|
||||
}
|
||||
|
||||
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp)
|
||||
hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr)
|
||||
{
|
||||
HIP_INIT();
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
@@ -1429,6 +1522,10 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g
|
||||
auto crit = stream->lockopen_preKernelCommand();
|
||||
lp->av = &(crit->_av);
|
||||
lp->cf = new hc::completion_future; // TODO, is this necessary?
|
||||
|
||||
stream->addCFtoStream(crit, lp->cf);
|
||||
|
||||
ihipPrintKernelLaunch(kernelNameStr, lp, stream);
|
||||
return (stream);
|
||||
}
|
||||
|
||||
|
||||
+52
-31
@@ -30,7 +30,7 @@ THE SOFTWARE.
|
||||
|
||||
//TODO Use Pool APIs from HCC to get memory regions.
|
||||
|
||||
namespace hipdrv{
|
||||
namespace hipdrv {
|
||||
|
||||
hsa_status_t findSystemRegions(hsa_region_t region, void *data){
|
||||
hsa_region_segment_t segment_id;
|
||||
@@ -99,7 +99,7 @@ uint64_t ElfSize(const void *emi){
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoad(hipModule_t *module, const char *fname){
|
||||
HIP_INIT_API(fname);
|
||||
HIP_INIT_API(module, fname);
|
||||
hipError_t ret = hipSuccess;
|
||||
*module = new ihipModule_t;
|
||||
|
||||
@@ -187,7 +187,7 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
|
||||
ret = hipErrorInvalidContext;
|
||||
|
||||
}else{
|
||||
*func = new ihipFunction_t;
|
||||
*func = new ihipFunction_t(name);
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
|
||||
@@ -199,14 +199,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
|
||||
}
|
||||
|
||||
status = hsa_executable_freeze(hmod->executable, NULL);
|
||||
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->kernel_symbol);
|
||||
status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->_kernelSymbol);
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorNotFound);
|
||||
}
|
||||
|
||||
status = hsa_executable_symbol_get_info((*func)->kernel_symbol,
|
||||
status = hsa_executable_symbol_get_info((*func)->_kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&(*func)->kernel);
|
||||
&(*func)->_kernel);
|
||||
|
||||
if(status != HSA_STATUS_SUCCESS){
|
||||
return ihipLogStatus(hipErrorNotFound);
|
||||
@@ -215,9 +215,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch
|
||||
return ihipLogStatus(ret);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
const char *name){
|
||||
HIP_INIT_API(name);
|
||||
HIP_INIT_API(hfunc, hmod, name);
|
||||
return ihipModuleGetFunction(hfunc, hmod, name);
|
||||
}
|
||||
|
||||
@@ -226,8 +227,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra){
|
||||
HIP_INIT_API(f->kernel);
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
|
||||
@@ -246,35 +252,47 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
memcpy(config, extra, sizeof(size_t)*5);
|
||||
if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){
|
||||
kernSize = *(size_t*)(config[3]);
|
||||
}else{
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
}
|
||||
}else{
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
/*
|
||||
Kernel argument preparation.
|
||||
*/
|
||||
hsa_status_t status;
|
||||
grid_launch_parm lp;
|
||||
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp);
|
||||
|
||||
/*
|
||||
Create signal
|
||||
*/
|
||||
uint32_t groupSegmentSize;
|
||||
hsa_status_t status = hsa_executable_symbol_get_info(f->_kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&groupSegmentSize);
|
||||
|
||||
uint32_t privateSegmentSize;
|
||||
status = hsa_executable_symbol_get_info(f->_kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&privateSegmentSize);
|
||||
privateSegmentSize += sharedMemBytes;
|
||||
|
||||
|
||||
/*
|
||||
Kernel argument preparation.
|
||||
*/
|
||||
grid_launch_parm lp;
|
||||
hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName);
|
||||
|
||||
/*
|
||||
Create signal
|
||||
*/
|
||||
|
||||
hsa_signal_t signal;
|
||||
status = hsa_signal_create(1, 0, NULL, &signal);
|
||||
|
||||
/*
|
||||
Launch AQL packet
|
||||
*/
|
||||
/*
|
||||
Launch AQL packet
|
||||
*/
|
||||
hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ,
|
||||
gridDimX, gridDimY, gridDimZ, sharedMemBytes, config[1], kernSize, f->kernel);
|
||||
gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel);
|
||||
|
||||
/*
|
||||
Wait for signal
|
||||
*/
|
||||
/*
|
||||
Wait for signal
|
||||
*/
|
||||
|
||||
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
|
||||
@@ -288,8 +306,9 @@ Kernel argument preparation.
|
||||
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char* name){
|
||||
HIP_INIT_API(name);
|
||||
hipModule_t hmod, const char* name)
|
||||
{
|
||||
HIP_INIT_API(dptr, bytes, hmod, name);
|
||||
hipError_t ret = hipSuccess;
|
||||
if(dptr == NULL || bytes == NULL){
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
@@ -301,13 +320,15 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipFunction_t func;
|
||||
ihipModuleGetFunction(&func, hmod, name);
|
||||
*bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t);
|
||||
*dptr = reinterpret_cast<void*>(func->kernel);
|
||||
*dptr = reinterpret_cast<void*>(func->_kernel);
|
||||
return ihipLogStatus(ret);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image){
|
||||
HIP_INIT_API(image);
|
||||
|
||||
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
|
||||
{
|
||||
HIP_INIT_API(module, image);
|
||||
hipError_t ret = hipSuccess;
|
||||
if(image == NULL || module == NULL){
|
||||
return ihipLogStatus(hipErrorNotInitialized);
|
||||
|
||||
@@ -115,6 +115,27 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamQuery(hipStream_t stream)
|
||||
{
|
||||
HIP_INIT_API(stream);
|
||||
|
||||
// Use default stream if 0 specified:
|
||||
if (stream == hipStreamNull) {
|
||||
ihipCtx_t *device = ihipGetTlsDefaultCtx();
|
||||
stream = device->_defaultStream;
|
||||
}
|
||||
|
||||
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
|
||||
int pendingOps = crit->_av.get_pending_async_ops();
|
||||
|
||||
|
||||
hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess;
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
{
|
||||
|
||||
@@ -2,74 +2,87 @@ cmake_minimum_required (VERSION 2.6)
|
||||
|
||||
# remove CMAKE_CXX_COMPILER entry from cache since it will be pointing to hipcc
|
||||
unset(CMAKE_CXX_COMPILER CACHE)
|
||||
message (CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER} )
|
||||
# remove HIP_PATH entry from cache since we might be running tests with a different configuration
|
||||
unset(HIP_PATH CACHE)
|
||||
|
||||
project (HIP_Unit_Tests)
|
||||
project(HIP_Unit_Tests)
|
||||
include(CTest)
|
||||
set(HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR})
|
||||
|
||||
|
||||
#include_directories( ${PROJECT_SOURCE_DIR}/include )
|
||||
set (HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR} )
|
||||
|
||||
# The version number.
|
||||
set (HIP_Unit_Test_VERSION_MAJOR 1)
|
||||
set (HIP_Unit_Test_VERSION_MINOR 0)
|
||||
|
||||
# Enable multi-gpu tests
|
||||
if(NOT DEFINED HIP_MULTI_GPU)
|
||||
set(HIP_MULTI_GPU 0 CACHE BOOL "Run tests requiring more than one GPU")
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED HIP_BUILD_LOCAL)
|
||||
if(NOT DEFINED ENV{HIP_BUILD_LOCAL})
|
||||
set(HIP_BUILD_LOCAL 1 CACHE BOOL "Build HIP in local folder")
|
||||
# Determine HIP_PATH
|
||||
if(NOT DEFINED HIP_PATH)
|
||||
if(NOT DEFINED ENV{HIP_PATH})
|
||||
# We are going to use HIP source...
|
||||
get_filename_component(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../.. ABSOLUTE)
|
||||
execute_process(
|
||||
COMMAND "${CMAKE_COMMAND}" -E remove_directory hip
|
||||
OUTPUT_QUIET
|
||||
ERROR_QUIET
|
||||
)
|
||||
execute_process(
|
||||
COMMAND "${CMAKE_COMMAND}" -E make_directory hip
|
||||
OUTPUT_QUIET
|
||||
ERROR_QUIET
|
||||
)
|
||||
message(STATUS "Configuring HIP")
|
||||
# ...so need to build HIP locally.
|
||||
execute_process(
|
||||
COMMAND "${CMAKE_COMMAND}" -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ${HIP_SRC_PATH}
|
||||
WORKING_DIRECTORY hip
|
||||
RESULT_VARIABLE hip_build_result
|
||||
OUTPUT_QUIET
|
||||
ERROR_QUIET
|
||||
)
|
||||
if(hip_build_result)
|
||||
message(FATAL_ERROR "Error configuring HIP")
|
||||
else()
|
||||
message(STATUS "Configuring HIP - done")
|
||||
message(STATUS "Building HIP")
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND "${CMAKE_COMMAND}" --build . --target install
|
||||
WORKING_DIRECTORY hip
|
||||
RESULT_VARIABLE hip_build_result
|
||||
OUTPUT_VARIABLE hip_build_log
|
||||
ERROR_QUIET
|
||||
)
|
||||
if(hip_build_result)
|
||||
message(${hip_build_log})
|
||||
message(FATAL_ERROR "Error building HIP")
|
||||
else()
|
||||
# Building HIP is successful. Point HIP_PATH to this location.
|
||||
message(STATUS "Building HIP - done")
|
||||
get_filename_component(HIP_PATH ${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ABSOLUTE)
|
||||
endif()
|
||||
# Add a target to rebuild HIP if HIP source changes.
|
||||
add_custom_target(
|
||||
hip ALL
|
||||
COMMAND "${CMAKE_COMMAND}" --build . --target install
|
||||
WORKING_DIRECTORY hip
|
||||
)
|
||||
else()
|
||||
set(HIP_BUILD_LOCAL $ENV{HIP_BUILD_LOCAL} CACHE BOOL "Build HIP in local folder")
|
||||
# We are using HIP_PATH from env. So just create a fake target.
|
||||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to installed HIP")
|
||||
add_custom_target(hip ALL)
|
||||
endif()
|
||||
else()
|
||||
# We are using HIP_PATH passed to cmake. So just create a fake target.
|
||||
add_custom_target(hip ALL)
|
||||
endif()
|
||||
MESSAGE("HIP_PATH=" ${HIP_PATH})
|
||||
|
||||
set(HIP_PATH $ENV{HIP_PATH})
|
||||
if (NOT DEFINED HIP_PATH)
|
||||
get_filename_component (HIP_PATH ../.. ABSOLUTE)
|
||||
endif()
|
||||
|
||||
# Determine HIP_PLATFORM
|
||||
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
|
||||
|
||||
MESSAGE ("HIP_PATH=" ${HIP_PATH})
|
||||
|
||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
MESSAGE ("HIP_PLATFORM=hcc")
|
||||
|
||||
set (HSA_PATH $ENV{HSA_PATH})
|
||||
if (NOT DEFINED HSA_PATH)
|
||||
set (HSA_PATH /opt/rocm/hsa)
|
||||
endif()
|
||||
|
||||
set (CODEXL_PATH $ENV{CODEXL_PATH})
|
||||
if (NOT DEFINED CODEXL_PATH)
|
||||
set (CODEXL_PATH /opt/AMD/CodeXL)
|
||||
endif()
|
||||
set (CODEXL_SDK_ATAL_PATH ${CODEXL_PATH}/SDK/AMDTActivityLogger)
|
||||
|
||||
#---
|
||||
# Add HSA library:
|
||||
add_library(hsa-runtime64 SHARED IMPORTED)
|
||||
set_property(TARGET hsa-runtime64 PROPERTY IMPORTED_LOCATION "${HSA_PATH}/lib/libhsa-runtime64.so")
|
||||
|
||||
#These includes are used for all files.
|
||||
#Include HIP and HC since the tests need both of these:
|
||||
include_directories(${HIP_PATH}/include)
|
||||
|
||||
# This will create a subdir "hip_hcc" in the test build directory
|
||||
# Any changes to hip_hcc source will be detected and force the library and then the tests to be rebuilt.
|
||||
if (${HIP_BUILD_LOCAL})
|
||||
add_subdirectory(${HIP_PATH} build.hip_hcc)
|
||||
#link_directories(${CMAKE_CURRENT_BINARY_DIR}/build.hip_hcc) # search the local hip_hcc for libhip_hcc.a
|
||||
set (CMAKE_CXX_FLAGS --hipcc_explicit_lib)
|
||||
endif()
|
||||
|
||||
|
||||
elseif (${HIP_PLATFORM} STREQUAL "nvcc")
|
||||
MESSAGE ("HIP_PLATFORM=nvcc")
|
||||
if(${HIP_PLATFORM} STREQUAL "hcc")
|
||||
MESSAGE("HIP_PLATFORM=hcc")
|
||||
elseif(${HIP_PLATFORM} STREQUAL "nvcc")
|
||||
MESSAGE("HIP_PLATFORM=nvcc")
|
||||
|
||||
#Need C++11 for threads in some of the tests.
|
||||
add_definitions(-std=c++11)
|
||||
@@ -77,33 +90,21 @@ elseif (${HIP_PLATFORM} STREQUAL "nvcc")
|
||||
# NVCC does not not support -rdynamic option
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS )
|
||||
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS )
|
||||
|
||||
else()
|
||||
MESSAGE (FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM})
|
||||
MESSAGE(FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM})
|
||||
endif()
|
||||
|
||||
|
||||
set (HIPCC ${HIP_PATH}/bin/hipcc)
|
||||
set (CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE)
|
||||
set(HIPCC ${HIP_PATH}/bin/hipcc)
|
||||
set(CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE)
|
||||
|
||||
add_library(test_common OBJECT test_common.cpp )
|
||||
|
||||
|
||||
# usage : build_hip_executable (exe_name CPP_FILES)
|
||||
macro (build_hip_executable exe cpp)
|
||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
if (${HIP_BUILD_LOCAL})
|
||||
#target_link_libraries(${exe} hip_hcc)
|
||||
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> $<TARGET_OBJECTS:hip_hcc> )
|
||||
else()
|
||||
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
|
||||
endif()
|
||||
else()
|
||||
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
|
||||
endif()
|
||||
add_executable (${exe} ${cpp} ${ARGN} $<TARGET_OBJECTS:test_common> )
|
||||
add_dependencies(${exe} hip)
|
||||
endmacro()
|
||||
|
||||
|
||||
# Make a hip executable, using libc++
|
||||
macro (build_hip_executable_libcpp exe cpp)
|
||||
build_hip_executable( ${exe} ${cpp} ${ARGN} )
|
||||
@@ -213,11 +214,11 @@ endif()
|
||||
if (${HIP_PLATFORM} STREQUAL "hcc")
|
||||
make_test(hipArray " ")
|
||||
make_test(hipFuncSetDevice " ")
|
||||
make_test(hipDynamicShared " ")
|
||||
endif()
|
||||
|
||||
make_hipify_test(specialFunc.cu )
|
||||
|
||||
make_test(hipDynamicShared " ")
|
||||
|
||||
# Add subdirs here:
|
||||
add_subdirectory(context)
|
||||
|
||||
@@ -34,7 +34,7 @@ int main(int argc, char *argv[])
|
||||
hipCtx_t ctx;
|
||||
hipCtx_t ctx1;
|
||||
|
||||
HIPCHECK(hipDeviceGetFromId(&device, 0));
|
||||
HIPCHECK(hipDeviceGet(&device, 0));
|
||||
HIPCHECK(hipCtxCreate(&ctx, 0, device));
|
||||
HIPCHECK(hipCtxGetCurrent(&ctx1));
|
||||
HIPCHECK(hipCtxGetDevice(&device1));
|
||||
|
||||
@@ -86,19 +86,25 @@ __host__ void double_precision_math_functions()
|
||||
nearbyint(0.0);
|
||||
//nextafter(0.0);
|
||||
//fX = 1.0; norm(1, &fX);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
norm3d(1.0, 0.0, 0.0);
|
||||
norm4d(1.0, 0.0, 0.0, 0.0);
|
||||
#endif
|
||||
normcdf(0.0);
|
||||
normcdfinv(1.0);
|
||||
pow(1.0, 0.0);
|
||||
rcbrt(1.0);
|
||||
remainder(2.0, 1.0);
|
||||
remquo(1.0, 2.0, &iX);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
rhypot(0.0, 1.0);
|
||||
#endif
|
||||
rint(1.0);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
fX = 1.0; rnorm(1, &fX);
|
||||
rnorm3d(0.0, 0.0, 1.0);
|
||||
rnorm4d(0.0, 0.0, 0.0, 1.0);
|
||||
#endif
|
||||
round(0.0);
|
||||
rsqrt(1.0);
|
||||
scalbln(0.0, 1);
|
||||
|
||||
@@ -56,7 +56,9 @@ __host__ void single_precision_math_functions()
|
||||
expm1f(0.0f);
|
||||
fabsf(1.0f);
|
||||
fdimf(1.0f, 0.0f);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
fdividef(0.0f, 1.0f);
|
||||
#endif
|
||||
floorf(0.0f);
|
||||
fmaf(1.0f, 2.0f, 3.0f);
|
||||
fmaxf(0.0f, 0.0f);
|
||||
@@ -86,8 +88,10 @@ __host__ void single_precision_math_functions()
|
||||
nanf("1");
|
||||
nearbyintf(0.0f);
|
||||
//nextafterf(0.0f);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
norm3df(1.0f, 0.0f, 0.0f);
|
||||
norm4df(1.0f, 0.0f, 0.0f, 0.0f);
|
||||
#endif
|
||||
normcdff(0.0f);
|
||||
normcdfinvf(1.0f);
|
||||
//fX = 1.0f; normf(1, &fX);
|
||||
@@ -95,11 +99,15 @@ __host__ void single_precision_math_functions()
|
||||
rcbrtf(1.0f);
|
||||
remainderf(2.0f, 1.0f);
|
||||
remquof(1.0f, 2.0f, &iX);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
rhypotf(0.0f, 1.0f);
|
||||
#endif
|
||||
rintf(1.0f);
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
rnorm3df(0.0f, 0.0f, 1.0f);
|
||||
rnorm4df(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
fX = 1.0f; rnormf(1, &fX);
|
||||
#endif
|
||||
roundf(0.0f);
|
||||
rsqrtf(1.0f);
|
||||
scalblnf(0.0f, 1);
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
#include <stdio.h>
|
||||
#include <hip_runtime.h>
|
||||
int main( void ) {
|
||||
hipDeviceProp_t prop;
|
||||
int dev;
|
||||
|
||||
hipGetDevice( &dev ) ;
|
||||
printf( "ID of current HIP device: %d\n", dev );
|
||||
|
||||
memset( &prop, 0, sizeof( hipDeviceProp_t ) );
|
||||
prop.major = 1;
|
||||
prop.minor = 3;
|
||||
hipChooseDevice( &dev, &prop );
|
||||
printf( "ID of hip device closest to revision 1.3: %d\n", dev );
|
||||
|
||||
hipSetDevice( dev );
|
||||
}
|
||||
@@ -27,7 +27,12 @@ template<typename T>
|
||||
__global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T* B_d, T* C_d, size_t numElements, size_t groupElements) {
|
||||
|
||||
// declare dynamic shared memory
|
||||
#if defined(__HIP_PLATFORM_HCC__)
|
||||
HIP_DYNAMIC_SHARED(T, sdata)
|
||||
#else
|
||||
HIP_DYNAMIC_SHARED(__align__(sizeof(T)) unsigned char, my_sdata)
|
||||
T *sdata = reinterpret_cast<T *>(my_sdata);
|
||||
#endif
|
||||
|
||||
size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
|
||||
size_t tid = hipThreadIdx_x;
|
||||
|
||||
@@ -38,7 +38,7 @@ __global__ void vAdd(hipLaunchParm lp, float *a){}
|
||||
cmd;\
|
||||
hipDeviceSynchronize();\
|
||||
gettimeofday(&stop, NULL);\
|
||||
} while(0);
|
||||
} while(0);
|
||||
|
||||
|
||||
|
||||
@@ -61,7 +61,9 @@ int main()
|
||||
{
|
||||
float *Ad;
|
||||
hipMalloc((void**)&Ad, 1024);
|
||||
hipLaunchKernel(vAdd, 1024, 1, 0, 0, Ad);
|
||||
|
||||
// Test the different hipLaunchParm options:
|
||||
hipLaunchKernel(vAdd, size_t(1024), 1, 0, 0, Ad);
|
||||
hipLaunchKernel(vAdd, 1024, dim3(1), 0, 0, Ad);
|
||||
hipLaunchKernel(vAdd, dim3(1024), 1, 0, 0, Ad);
|
||||
hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad);
|
||||
|
||||
@@ -27,13 +27,14 @@ unsigned p_streams = 6;
|
||||
|
||||
//------
|
||||
// Structure for one stream;
|
||||
template <typename T>
|
||||
template <typename T>
|
||||
class Streamer {
|
||||
public:
|
||||
Streamer(size_t numElements);
|
||||
~Streamer();
|
||||
void runAsync();
|
||||
void waitComplete();
|
||||
void queryUntilComplete();
|
||||
|
||||
|
||||
private:
|
||||
T *_A_h;
|
||||
@@ -66,11 +67,24 @@ void Streamer<T>::runAsync()
|
||||
printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0);
|
||||
unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements);
|
||||
hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements);
|
||||
|
||||
// Test case where hipStreamWaitEvent waits on same event we just placed into the queue.
|
||||
HIPCHECK(hipEventRecord(_event, _stream));
|
||||
HIPCHECK(hipStreamWaitEvent(_stream, _event, 0));
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Streamer<T>::queryUntilComplete()
|
||||
{
|
||||
int numQueries = 0;
|
||||
hipError_t e = hipSuccess;
|
||||
do {
|
||||
numQueries++;
|
||||
e = hipStreamQuery(_stream);
|
||||
} while (e != hipSuccess) ;
|
||||
|
||||
printf ("completed after %d queries\n", numQueries);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -99,7 +113,7 @@ void parseMyArguments(int argc, char *argv[])
|
||||
//---
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
HipTest::parseStandardArguments(argc, argv, true);
|
||||
HipTest::parseStandardArguments(argc, argv, false);
|
||||
parseMyArguments(argc, argv);
|
||||
|
||||
typedef Streamer<float> FloatStreamer;
|
||||
@@ -113,11 +127,28 @@ int main(int argc, char *argv[])
|
||||
streamers.push_back(s);
|
||||
}
|
||||
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsync();
|
||||
if (p_tests & 0x1) {
|
||||
printf ("==> Test 0x1 runAsnc\n");
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsync();
|
||||
}
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
if (p_tests & 0x2) {
|
||||
printf ("==> Test 0x2 queryUntilComplete\n");
|
||||
for (int i=0; i<p_streams; i++) {
|
||||
streamers[i]->runAsync();
|
||||
streamers[i]->queryUntilComplete();
|
||||
}
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
}
|
||||
|
||||
if (p_tests & 0x4) {
|
||||
hipStreamQuery(0/* try null stream*/);
|
||||
|
||||
}
|
||||
|
||||
HIPCHECK(hipDeviceSynchronize());
|
||||
|
||||
passed();
|
||||
}
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele