Remove hip-hcc codes: Part one
Remove hip-hcc codes from hip code base Simplify hip CMakeLists.txt to exclude hip-hcc Simplify cmake cmd for hip-rocclr building Some minor fixes Change-Id: I1ae357ecfd638d6c25bca293c1724b026be21ecd
Этот коммит содержится в:
коммит произвёл
Aaron En Ye Shi
родитель
8a5b8a36f2
Коммит
186f95ea30
+27
-238
@@ -1,12 +1,18 @@
|
||||
cmake_minimum_required(VERSION 3.4.3)
|
||||
project(hip)
|
||||
# sample command for hip-hcc
|
||||
# cmake -DHIP_RUNTIME=hcc ..
|
||||
# sample command for hip-rocclr, you'll need to have rocclr installed
|
||||
# cmake ..
|
||||
# cmake -DHIP_COMPILER=clang ..
|
||||
# cmake -DHIP_COMPILER=clang -DHIP_RUNTIME=rocclr ..
|
||||
# cmake -DHIP_COMPILER=clang -DHIP_RUNTIME=rocclr -DOPENCL_DIR=/path/to/opencl/api/opencl -DCMAKE_PREFIX_PATH=/path/to/rocclr/build/or/install/directory ..
|
||||
|
||||
# sample command for hip-rocclr runtime, you'll need to have rocclr built
|
||||
# For shared lib of hip-rocclr runtime
|
||||
# For release version
|
||||
# cmake -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
# For debug version
|
||||
# cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
# For static lib of hip-rocclr runtime
|
||||
# For release version
|
||||
# cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
# For debug version
|
||||
# cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
# If you don't specify CMAKE_INSTALL_PREFIX, hip-rocclr runtime will be installed to "/opt/rocm/hip".
|
||||
|
||||
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or static lib (.a) ")
|
||||
|
||||
@@ -134,7 +140,7 @@ add_to_config(_versionInfo HIP_VERSION_PATCH)
|
||||
set (HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR})
|
||||
set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR})
|
||||
if (${ROCM_PATCH_VERSION} )
|
||||
set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
else ()
|
||||
set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH})
|
||||
endif ()
|
||||
@@ -145,15 +151,6 @@ if (DEFINED ENV{ROCM_RPATH})
|
||||
set (CMAKE_SKIP_BUILD_RPATH TRUE)
|
||||
endif ()
|
||||
|
||||
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
|
||||
set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler")
|
||||
set(HIP_PLATFORM "amd" CACHE STRING "HIP Platform")
|
||||
set(HIP_RUNTIME "hcc" CACHE STRING "HIP Runtime")
|
||||
get_filename_component(CXX_PATH ${CMAKE_CXX_COMPILER} DIRECTORY)
|
||||
get_filename_component(CXX_PATH ${CXX_PATH} DIRECTORY)
|
||||
set(HCC_HOME "${CXX_PATH}" CACHE PATH "Path to which hcc has been installed")
|
||||
endif()
|
||||
|
||||
# overwrite HIP_VERSION_PATCH for packaging
|
||||
set(HIP_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH})
|
||||
|
||||
@@ -177,68 +174,21 @@ endif()
|
||||
message(STATUS "HIP Platform: " ${HIP_PLATFORM})
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "nvidia")
|
||||
set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler")
|
||||
set(HIP_RUNTIME "cuda" CACHE STRING "HIP Runtime")
|
||||
endif()
|
||||
|
||||
# default runtime is rocclr
|
||||
if(NOT DEFINED HIP_RUNTIME)
|
||||
if(NOT DEFINED ENV{HIP_RUNTIME})
|
||||
set(HIP_RUNTIME "rocclr" CACHE STRING "HIP Runtime")
|
||||
else()
|
||||
set(HIP_RUNTIME $ENV{HIP_RUNTIME} CACHE STRING "HIP Compiler")
|
||||
endif()
|
||||
set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler")
|
||||
elseif(HIP_PLATFORM STREQUAL "amd")
|
||||
set(HIP_RUNTIME "rocclr" CACHE STRING "HIP Runtime")
|
||||
set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler")
|
||||
else()
|
||||
message(FATAL_ERROR "Unexpected HIP_PLATFORM: " ${HIP_PLATFORM})
|
||||
endif()
|
||||
|
||||
message(STATUS "HIP Runtime: " ${HIP_RUNTIME})
|
||||
add_to_config(_buildInfo HIP_RUNTIME)
|
||||
|
||||
# Determine HIP_COMPILER
|
||||
# Either hcc or clang; default is clang
|
||||
if(NOT DEFINED HIP_COMPILER)
|
||||
if(NOT DEFINED ENV{HIP_COMPILER})
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler")
|
||||
else()
|
||||
set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler")
|
||||
endif()
|
||||
else()
|
||||
set(HIP_COMPILER $ENV{HIP_COMPILER} CACHE STRING "HIP Compiler")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
message(STATUS "HIP Compiler: " ${HIP_COMPILER})
|
||||
|
||||
add_to_config(_buildInfo HIP_RUNTIME)
|
||||
add_to_config(_buildInfo HIP_COMPILER)
|
||||
|
||||
########### Determine HCC_HOME If compiler is hcc ##################
|
||||
|
||||
if(HIP_COMPILER STREQUAL "hcc")
|
||||
# Determine HCC_HOME
|
||||
if(NOT DEFINED HCC_HOME)
|
||||
if(NOT DEFINED ENV{HCC_HOME})
|
||||
set(HCC_HOME "/opt/rocm/hcc" CACHE PATH "Path to which HCC has been installed")
|
||||
else()
|
||||
set(HCC_HOME $ENV{HCC_HOME} CACHE PATH "Path to which HCC has been installed")
|
||||
endif()
|
||||
endif()
|
||||
if(IS_ABSOLUTE ${HCC_HOME} AND EXISTS ${HCC_HOME} AND IS_DIRECTORY ${HCC_HOME})
|
||||
execute_process(COMMAND ${HCC_HOME}/bin/hcc --version
|
||||
OUTPUT_VARIABLE HCC_VERSION
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
string(REGEX REPLACE ".*based on HCC " "" HCC_VERSION ${HCC_VERSION})
|
||||
string(REGEX REPLACE " .*" "" HCC_VERSION ${HCC_VERSION})
|
||||
message(STATUS "Looking for HCC in: " ${HCC_HOME} ". Found version: " ${HCC_VERSION})
|
||||
else()
|
||||
message(FATAL_ERROR "Don't know where to find HCC. Please specify abolute path using -DHCC_HOME")
|
||||
endif()
|
||||
add_to_config(_buildInfo HCC_VERSION)
|
||||
string(REPLACE "-" ";" HCC_VERSION_LIST ${HCC_VERSION})
|
||||
list(GET HCC_VERSION_LIST 0 HCC_PACKAGE_VERSION)
|
||||
string(REPLACE "." ";" HCC_VERSION_LIST ${HCC_PACKAGE_VERSION})
|
||||
list(GET HCC_VERSION_LIST 0 HCC_VERSION_MAJOR)
|
||||
list(GET HCC_VERSION_LIST 1 HCC_VERSION_MINOR)
|
||||
endif()
|
||||
|
||||
############ If HIP_PLATFORM is amd, HSA_PATH has to be defined ##################
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
@@ -256,7 +206,7 @@ if(HIP_PLATFORM STREQUAL "amd")
|
||||
message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "\nHSA runtime in: " ${HSA_PATH})
|
||||
message(STATUS "HSA runtime in: " ${HSA_PATH})
|
||||
|
||||
# Set default build type
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
@@ -268,13 +218,7 @@ if (UNIX)
|
||||
set(HIP_DEFAULT_INSTALL_PREFIX "/opt/rocm/hip")
|
||||
endif()
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE)
|
||||
elseif(CMAKE_BUILD_TYPE MATCHES Release)
|
||||
set(CMAKE_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for HIP" FORCE)
|
||||
else()
|
||||
message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release")
|
||||
endif()
|
||||
set(CMAKE_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for HIP" FORCE)
|
||||
endif()
|
||||
|
||||
if(DEV_LOG_ENABLE MATCHES "yes")
|
||||
@@ -341,129 +285,6 @@ if(HIP_RUNTIME STREQUAL "rocclr")
|
||||
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include")
|
||||
endif()
|
||||
|
||||
# Build hip_hcc if runtime is hcc
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
#############################
|
||||
# Profiling API support
|
||||
#############################
|
||||
# Generate profiling API macros/structures header
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h")
|
||||
set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h")
|
||||
set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src")
|
||||
set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py")
|
||||
set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt")
|
||||
set(PROF_API_CMD "${PROF_API_GEN} -v ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} >${PROF_API_LOG}")
|
||||
MESSAGE(STATUS "Generating profiling promitives: ${PROF_API_STR}")
|
||||
execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}")
|
||||
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR})
|
||||
|
||||
# Enable profiling API
|
||||
find_path(PROF_API_HEADER_DIR prof_protocol.h
|
||||
HINTS
|
||||
${PROF_API_HEADER_PATH}
|
||||
PATHS
|
||||
/opt/rocm/roctracer
|
||||
PATH_SUFFIXES
|
||||
include/ext
|
||||
)
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
|
||||
else()
|
||||
add_definitions(-DUSE_PROF_API=1)
|
||||
include_directories(${PROF_API_HEADER_DIR})
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include_directories(${PROJECT_BINARY_DIR}/include)
|
||||
include_directories(${PROJECT_SOURCE_DIR}/include)
|
||||
set(HIP_HCC_BUILD_FLAGS)
|
||||
|
||||
# Add HIP_VERSION to CMAKE_<LANG>_FLAGS
|
||||
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSION_MAJOR} -DHIP_VERSION_MINOR=${HIP_VERSION_MINOR} -DHIP_VERSION_PATCH=${HIP_VERSION_GITDATE}")
|
||||
|
||||
# Add remaining flags
|
||||
set(HCC_CXX_FLAGS "-Xlinker --enable-new-dtags -hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ")
|
||||
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include")
|
||||
|
||||
# Set compiler and compiler flags
|
||||
set(CMAKE_CXX_COMPILER "${HCC_HOME}/bin/hcc")
|
||||
set(CMAKE_C_COMPILER "${HCC_HOME}/bin/hcc")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
|
||||
|
||||
set(SOURCE_FILES_RUNTIME
|
||||
src/program_state.cpp
|
||||
src/hip_clang.cpp
|
||||
src/hip_hcc.cpp
|
||||
src/hip_context.cpp
|
||||
src/hip_device.cpp
|
||||
src/hip_error.cpp
|
||||
src/hip_event.cpp
|
||||
src/hip_fatbin.cpp
|
||||
src/hip_memory.cpp
|
||||
src/hip_peer.cpp
|
||||
src/hip_stream.cpp
|
||||
src/hip_module.cpp
|
||||
src/hip_db.cpp
|
||||
src/grid_launch.cpp
|
||||
src/hip_texture.cpp
|
||||
src/hip_surface.cpp
|
||||
src/hip_intercept.cpp
|
||||
src/env.cpp
|
||||
src/h2f.cpp)
|
||||
|
||||
add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
|
||||
add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME})
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET hip_hcc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET hip_hcc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
|
||||
|
||||
target_link_libraries(hip_hcc PRIVATE hc_am)
|
||||
target_link_libraries(hip_hcc_static PRIVATE hc_am)
|
||||
|
||||
add_library(hiprtc SHARED src/hiprtc.cpp)
|
||||
target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY)
|
||||
set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
|
||||
|
||||
target_include_directories(
|
||||
hiprtc SYSTEM
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include)
|
||||
|
||||
set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||
set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
|
||||
set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden)
|
||||
set_target_properties(hiprtc PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
|
||||
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
cmake/amd_comgr
|
||||
lib/cmake/amd_comgr
|
||||
)
|
||||
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
|
||||
target_link_libraries(hip_hcc PRIVATE amd_comgr)
|
||||
target_link_libraries(hip_hcc_static PRIVATE amd_comgr)
|
||||
|
||||
string(REPLACE " " ";" HCC_CXX_FLAGS_LIST ${HCC_CXX_FLAGS})
|
||||
foreach(TARGET hip_hcc hip_hcc_static)
|
||||
target_include_directories(${TARGET} SYSTEM INTERFACE $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>;${HSA_PATH}/include)
|
||||
endforeach()
|
||||
add_library(host INTERFACE)
|
||||
target_link_libraries(host INTERFACE hip_hcc)
|
||||
add_library(device INTERFACE)
|
||||
if(HIP_COMPILER STREQUAL "hcc")
|
||||
target_link_libraries(device INTERFACE host hcc::hccrt hcc::hc_am)
|
||||
else()
|
||||
target_link_libraries(device INTERFACE host)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(HIP_PLATFORM STREQUAL "amd")
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
|
||||
endif()
|
||||
@@ -488,10 +309,6 @@ endif()
|
||||
#############################
|
||||
# Install steps
|
||||
#############################
|
||||
# Install hip_hcc if runtime is hcc
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# Install .hipInfo
|
||||
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
|
||||
@@ -503,9 +320,7 @@ install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin)
|
||||
execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
RESULT_VARIABLE INSTALL_SOURCE)
|
||||
if(NOT ${INSTALL_SOURCE} EQUAL 0)
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
install(DIRECTORY src DESTINATION .)
|
||||
elseif(HIP_RUNTIME STREQUAL "rocclr")
|
||||
if(HIP_RUNTIME STREQUAL "rocclr")
|
||||
install(DIRECTORY rocclr DESTINATION .)
|
||||
endif()
|
||||
install(DIRECTORY bin DESTINATION . USE_SOURCE_PERMISSIONS)
|
||||
@@ -522,13 +337,6 @@ install(DIRECTORY ${PROJECT_BINARY_DIR}/include/hip
|
||||
DESTINATION include
|
||||
FILES_MATCHING PATTERN "*.h*")
|
||||
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
|
||||
install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
|
||||
elseif(HIP_RUNTIME STREQUAL "rocclr")
|
||||
# install(TARGETS hip_on_rocclr host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
|
||||
endif()
|
||||
|
||||
#############################
|
||||
# hip-config
|
||||
#############################
|
||||
@@ -577,22 +385,7 @@ if (BUILD_HIPIFY_CLANG)
|
||||
add_dependencies(pkg_hip_base hipify-clang)
|
||||
endif()
|
||||
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
message("HCC Package\n")
|
||||
# Package: hip_hcc
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc)
|
||||
configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY)
|
||||
add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
|
||||
COMMAND rm -rf *.deb *.rpm *.tar.gz
|
||||
COMMAND make package
|
||||
COMMAND cp *.deb ${PROJECT_BINARY_DIR}
|
||||
COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
|
||||
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
|
||||
WORKING_DIRECTORY ${BUILD_DIR}
|
||||
DEPENDS hip_hcc hip_hcc_static hiprtc)
|
||||
elseif(HIP_RUNTIME STREQUAL "rocclr")
|
||||
if(HIP_RUNTIME STREQUAL "rocclr")
|
||||
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/rocclr)
|
||||
configure_file(packaging/hip-rocclr.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
|
||||
configure_file(packaging/hip-rocclr.postinst ${BUILD_DIR}/postinst @ONLY)
|
||||
@@ -645,11 +438,7 @@ if(POLICY CMP0037)
|
||||
cmake_policy(SET CMP0037 OLD)
|
||||
endif()
|
||||
|
||||
if(HIP_RUNTIME STREQUAL "hcc")
|
||||
add_custom_target(package
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
|
||||
elseif(HIP_RUNTIME STREQUAL "rocclr")
|
||||
if(HIP_RUNTIME STREQUAL "rocclr")
|
||||
add_custom_target(package
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS pkg_hip_base hip_on_rocclr pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
|
||||
|
||||
+2
-1
@@ -95,9 +95,10 @@ git clone -b rocm-3.10.x https://github.com/ROCm-Developer-Tools/HIP.git
|
||||
export HIP_DIR="$(readlink -f HIP)"
|
||||
cd "$HIP_DIR"
|
||||
mkdir -p build; cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
cmake -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
|
||||
make -j
|
||||
sudo make install
|
||||
Note: If you don't specify CMAKE_INSTALL_PREFIX, hip-rocclr runtime will be installed to "/opt/rocm/hip".
|
||||
```
|
||||
|
||||
## Default paths and environment variables
|
||||
|
||||
+6
-24
@@ -8,7 +8,7 @@ use 5.006; use v5.10.1;
|
||||
use Getopt::Long;
|
||||
use Cwd;
|
||||
|
||||
# Return name of HIP compiler - either 'nvcc' or 'hcc'
|
||||
# Return name of HIP compiler - either 'clang' or 'nvcc'
|
||||
#
|
||||
use Getopt::Long;
|
||||
use File::Basename;
|
||||
@@ -34,9 +34,9 @@ if ($p_help) {
|
||||
print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n";
|
||||
print " --rocmpath, -R : print ROCM_PATH (use env var if set, else determine from hip path or /opt/rocm)\n";
|
||||
print " --cpp_config, -C : print C++ compiler options\n";
|
||||
print " --compiler, -c : print compiler (hcc or clang or nvcc)\n";
|
||||
print " --compiler, -c : print compiler (clang or nvcc)\n";
|
||||
print " --platform, -P : print platform (amd or nvidia)\n";
|
||||
print " --runtime, -r : print runtime (hcc or rocclr)\n";
|
||||
print " --runtime, -r : print runtime (rocclr or cuda)\n";
|
||||
print " --hipclangpath, -l : print HIP_CLANG_PATH\n";
|
||||
print " --full, -f : print full config\n";
|
||||
print " --version, -v : print hip version\n";
|
||||
@@ -86,7 +86,6 @@ if (-e "$HIP_PATH/../bin/rocm_agent_enumerator") {
|
||||
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
|
||||
}
|
||||
$CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda';
|
||||
$HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc";
|
||||
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
|
||||
$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin";
|
||||
# HIP_ROCCLR_HOME is used by Windows builds
|
||||
@@ -120,8 +119,6 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "rocclr" and !defined $HIP_ROCCLR_H
|
||||
if (not defined $HIP_PLATFORM) {
|
||||
if (can_run("$HIP_CLANG_PATH/clang++") or can_run("clang++")) {
|
||||
$HIP_PLATFORM = "amd";
|
||||
} elsif (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) {
|
||||
$HIP_PLATFORM = "amd";
|
||||
} elsif (can_run("$CUDA_PATH/bin/nvcc") or can_run("nvcc")) {
|
||||
$HIP_PLATFORM = "nvidia";
|
||||
$HIP_COMPILER = "nvcc";
|
||||
@@ -132,9 +129,6 @@ if (not defined $HIP_PLATFORM) {
|
||||
}
|
||||
}
|
||||
|
||||
if ($HIP_COMPILER eq "hcc") {
|
||||
$CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include";
|
||||
}
|
||||
if ($HIP_COMPILER eq "clang") {
|
||||
# Windows does not have clang at linux default path
|
||||
if (defined $HIP_ROCCLR_HOME and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) {
|
||||
@@ -216,20 +210,6 @@ if (!$printed or $p_full) {
|
||||
if ($HIP_PLATFORM eq "amd")
|
||||
{
|
||||
print "\n" ;
|
||||
if ($HIP_COMPILER eq "hcc")
|
||||
{
|
||||
print "== hcc\n";
|
||||
print ("HSA_PATH : $HSA_PATH\n");
|
||||
print ("HCC_HOME : $HCC_HOME\n");
|
||||
system("$HCC_HOME/bin/hcc --version");
|
||||
system("$HCC_HOME/bin/llc --version");
|
||||
print ("HCC-cxxflags : ");
|
||||
system("$HCC_HOME/bin/hcc-config --cxxflags");
|
||||
printf("\n");
|
||||
print ("HCC-ldflags : ");
|
||||
system("$HCC_HOME/bin/hcc-config --ldflags");
|
||||
printf("\n");
|
||||
}
|
||||
if ($HIP_COMPILER eq "clang")
|
||||
{
|
||||
print "== hip-clang\n";
|
||||
@@ -243,6 +223,8 @@ if (!$printed or $p_full) {
|
||||
print ("hip-clang-ldflags : ");
|
||||
system("$HIP_PATH/bin/hipcc --ldflags");
|
||||
printf("\n");
|
||||
} else {
|
||||
print ("Unexpected HIP_COMPILER: $HIP_COMPILER\n");
|
||||
}
|
||||
}
|
||||
if ($HIP_PLATFORM eq "nvidia") {
|
||||
@@ -256,7 +238,7 @@ if (!$printed or $p_full) {
|
||||
|
||||
print "=== Environment Variables\n";
|
||||
system("echo PATH=\$PATH");
|
||||
system("env | egrep '^HIP|^HSA|^HCC|^CUDA|^LD_LIBRARY_PATH'");
|
||||
system("env | egrep '^HIP|^HSA|^CUDA|^LD_LIBRARY_PATH'");
|
||||
|
||||
|
||||
print "\n" ;
|
||||
|
||||
Исполняемый файл → Обычный файл
@@ -13,8 +13,6 @@ set_target_properties(
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(lpl
|
||||
PUBLIC
|
||||
${PROJECT_SOURCE_DIR}/src
|
||||
PRIVATE
|
||||
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
|
||||
|
||||
@@ -34,7 +32,6 @@ set_target_properties(
|
||||
CXX_EXTENSIONS OFF
|
||||
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
target_include_directories(ca SYSTEM PUBLIC ${HSA_PATH}/include)
|
||||
target_include_directories(ca PUBLIC ${PROJECT_SOURCE_DIR}/src)
|
||||
|
||||
find_package(hsa-runtime64 REQUIRED CONFIG
|
||||
PATHS
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
|
||||
#include "common.hpp"
|
||||
|
||||
#include "../src/code_object_bundle.inl"
|
||||
|
||||
#include "code_object_bundle.inl"
|
||||
#include "clara/clara.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
@@ -637,7 +637,7 @@ struct BoundFlagRefBase : BoundRefBase {
|
||||
auto isFlag() const -> bool override { return true; }
|
||||
|
||||
auto setValue(std::string const& arg) -> ParserResult override {
|
||||
bool flag;
|
||||
bool flag = 0;
|
||||
auto result = convertInto(arg, flag);
|
||||
if (result) setFlag(flag);
|
||||
return result;
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
//===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Enums and constants for AMDGPU PT_NOTE sections.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
|
||||
|
||||
namespace AMDGPU {
|
||||
|
||||
namespace ElfNote {
|
||||
|
||||
const char SectionName[] = ".note";
|
||||
|
||||
const char NoteName[] = "AMD";
|
||||
|
||||
// TODO: Move this enum to include/llvm/Support so it can be used in tools?
|
||||
enum NoteType {
|
||||
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
|
||||
NT_AMDGPU_HSA_HSAIL = 2,
|
||||
NT_AMDGPU_HSA_ISA = 3,
|
||||
NT_AMDGPU_HSA_PRODUCER = 4,
|
||||
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
|
||||
NT_AMDGPU_HSA_EXTENSION = 6,
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA_V_1 = 7, // deprecated since 12/14/16.
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA_V_2 = 8,
|
||||
NT_AMDGPU_HSA_RUNTIME_METADATA = NT_AMDGPU_HSA_RUNTIME_METADATA_V_2,
|
||||
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
|
||||
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
|
||||
};
|
||||
} // namespace ElfNote
|
||||
} // namespace AMDGPU
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H
|
||||
@@ -1,290 +0,0 @@
|
||||
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file
|
||||
///
|
||||
/// Enums and structure types used by runtime metadata.
|
||||
///
|
||||
/// Runtime requests certain information (metadata) about kernels to be able
|
||||
/// to execute the kernels and answer the queries about the kernels.
|
||||
/// The metadata is represented as a note element in the .note ELF section of a
|
||||
/// binary (code object). The desc field of the note element is a YAML string
|
||||
/// consisting of key-value pairs. Each key is a string. Each value can be
|
||||
/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
|
||||
/// At the beginning of the YAML string is the module level YAML map. A
|
||||
/// kernel-level YAML map is in the amd.Kernels sequence. A
|
||||
/// kernel-argument-level map is in the amd.Args sequence.
|
||||
///
|
||||
/// The format should be kept backward compatible. New enum values and bit
|
||||
/// fields should be appended at the end. It is suggested to bump up the
|
||||
/// revision number whenever the format changes and document the change
|
||||
/// in the revision in this header.
|
||||
///
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace AMDGPU {
|
||||
namespace RuntimeMD {
|
||||
|
||||
// Version and revision of runtime metadata
|
||||
const unsigned char MDVersion = 2;
|
||||
const unsigned char MDRevision = 1;
|
||||
|
||||
// Name of keys for runtime metadata.
|
||||
namespace KeyName {
|
||||
|
||||
// Runtime metadata version
|
||||
const char MDVersion[] = "amd.MDVersion";
|
||||
|
||||
// Instruction set architecture information
|
||||
const char IsaInfo[] = "amd.IsaInfo";
|
||||
// Wavefront size
|
||||
const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
|
||||
// Local memory size in bytes
|
||||
const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
|
||||
// Number of execution units per compute unit
|
||||
const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
|
||||
// Maximum number of waves per execution unit
|
||||
const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
|
||||
// Maximum flat work group size
|
||||
const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
|
||||
// SGPR allocation granularity
|
||||
const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
|
||||
// Total number of SGPRs
|
||||
const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
|
||||
// Addressable number of SGPRs
|
||||
const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
|
||||
// VGPR allocation granularity
|
||||
const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
|
||||
// Total number of VGPRs
|
||||
const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
|
||||
// Addressable number of VGPRs
|
||||
const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
|
||||
|
||||
// Language
|
||||
const char Language[] = "amd.Language";
|
||||
// Language version
|
||||
const char LanguageVersion[] = "amd.LanguageVersion";
|
||||
|
||||
// Kernels
|
||||
const char Kernels[] = "amd.Kernels";
|
||||
// Kernel name
|
||||
const char KernelName[] = "amd.KernelName";
|
||||
// Kernel arguments
|
||||
const char Args[] = "amd.Args";
|
||||
// Kernel argument size in bytes
|
||||
const char ArgSize[] = "amd.ArgSize";
|
||||
// Kernel argument alignment
|
||||
const char ArgAlign[] = "amd.ArgAlign";
|
||||
// Kernel argument type name
|
||||
const char ArgTypeName[] = "amd.ArgTypeName";
|
||||
// Kernel argument name
|
||||
const char ArgName[] = "amd.ArgName";
|
||||
// Kernel argument kind
|
||||
const char ArgKind[] = "amd.ArgKind";
|
||||
// Kernel argument value type
|
||||
const char ArgValueType[] = "amd.ArgValueType";
|
||||
// Kernel argument address qualifier
|
||||
const char ArgAddrQual[] = "amd.ArgAddrQual";
|
||||
// Kernel argument access qualifier
|
||||
const char ArgAccQual[] = "amd.ArgAccQual";
|
||||
// Kernel argument is const qualified
|
||||
const char ArgIsConst[] = "amd.ArgIsConst";
|
||||
// Kernel argument is restrict qualified
|
||||
const char ArgIsRestrict[] = "amd.ArgIsRestrict";
|
||||
// Kernel argument is volatile qualified
|
||||
const char ArgIsVolatile[] = "amd.ArgIsVolatile";
|
||||
// Kernel argument is pipe qualified
|
||||
const char ArgIsPipe[] = "amd.ArgIsPipe";
|
||||
// Required work group size
|
||||
const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
|
||||
// Work group size hint
|
||||
const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
|
||||
// Vector type hint
|
||||
const char VecTypeHint[] = "amd.VecTypeHint";
|
||||
// Kernel index for device enqueue
|
||||
const char KernelIndex[] = "amd.KernelIndex";
|
||||
// No partial work groups
|
||||
const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
|
||||
// Prinf function call information
|
||||
const char PrintfInfo[] = "amd.PrintfInfo";
|
||||
// The actual kernel argument access qualifier
|
||||
const char ArgActualAcc[] = "amd.ArgActualAcc";
|
||||
// Alignment of pointee type
|
||||
const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
|
||||
|
||||
} // end namespace KeyName
|
||||
|
||||
namespace KernelArg {
|
||||
|
||||
enum Kind : uint8_t {
|
||||
ByValue = 0,
|
||||
GlobalBuffer = 1,
|
||||
DynamicSharedPointer = 2,
|
||||
Sampler = 3,
|
||||
Image = 4,
|
||||
Pipe = 5,
|
||||
Queue = 6,
|
||||
HiddenGlobalOffsetX = 7,
|
||||
HiddenGlobalOffsetY = 8,
|
||||
HiddenGlobalOffsetZ = 9,
|
||||
HiddenNone = 10,
|
||||
HiddenPrintfBuffer = 11,
|
||||
HiddenDefaultQueue = 12,
|
||||
HiddenCompletionAction = 13,
|
||||
};
|
||||
|
||||
enum ValueType : uint16_t {
|
||||
Struct = 0,
|
||||
I8 = 1,
|
||||
U8 = 2,
|
||||
I16 = 3,
|
||||
U16 = 4,
|
||||
F16 = 5,
|
||||
I32 = 6,
|
||||
U32 = 7,
|
||||
F32 = 8,
|
||||
I64 = 9,
|
||||
U64 = 10,
|
||||
F64 = 11,
|
||||
};
|
||||
|
||||
// Avoid using 'None' since it conflicts with a macro in X11 header file.
|
||||
enum AccessQualifer : uint8_t {
|
||||
AccNone = 0,
|
||||
ReadOnly = 1,
|
||||
WriteOnly = 2,
|
||||
ReadWrite = 3,
|
||||
};
|
||||
|
||||
enum AddressSpaceQualifer : uint8_t {
|
||||
Private = 0,
|
||||
Global = 1,
|
||||
Constant = 2,
|
||||
Local = 3,
|
||||
Generic = 4,
|
||||
Region = 5,
|
||||
};
|
||||
|
||||
} // end namespace KernelArg
|
||||
|
||||
// Invalid values are used to indicate an optional key should not be emitted.
|
||||
const uint8_t INVALID_ADDR_QUAL = 0xff;
|
||||
const uint8_t INVALID_ACC_QUAL = 0xff;
|
||||
const uint32_t INVALID_KERNEL_INDEX = ~0U;
|
||||
|
||||
namespace KernelArg {
|
||||
|
||||
// In-memory representation of kernel argument information.
|
||||
struct Metadata {
|
||||
uint32_t Size = 0;
|
||||
uint32_t Align = 0;
|
||||
uint32_t PointeeAlign = 0;
|
||||
uint8_t Kind = 0;
|
||||
uint16_t ValueType = 0;
|
||||
std::string TypeName;
|
||||
std::string Name;
|
||||
uint8_t AddrQual = INVALID_ADDR_QUAL;
|
||||
uint8_t AccQual = INVALID_ACC_QUAL;
|
||||
uint8_t IsVolatile = 0;
|
||||
uint8_t IsConst = 0;
|
||||
uint8_t IsRestrict = 0;
|
||||
uint8_t IsPipe = 0;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace KernelArg
|
||||
|
||||
namespace Kernel {
|
||||
|
||||
// In-memory representation of kernel information.
|
||||
struct Metadata {
|
||||
std::string Name;
|
||||
std::string Language;
|
||||
std::vector<uint8_t> LanguageVersion;
|
||||
std::vector<uint32_t> ReqdWorkGroupSize;
|
||||
std::vector<uint32_t> WorkGroupSizeHint;
|
||||
std::string VecTypeHint;
|
||||
uint32_t KernelIndex = INVALID_KERNEL_INDEX;
|
||||
uint8_t NoPartialWorkGroups = 0;
|
||||
std::vector<KernelArg::Metadata> Args;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace Kernel
|
||||
|
||||
namespace IsaInfo {
|
||||
|
||||
/// \brief In-memory representation of instruction set architecture
|
||||
/// information.
|
||||
struct Metadata {
|
||||
/// \brief Wavefront size.
|
||||
unsigned WavefrontSize = 0;
|
||||
/// \brief Local memory size in bytes.
|
||||
unsigned LocalMemorySize = 0;
|
||||
/// \brief Number of execution units per compute unit.
|
||||
unsigned EUsPerCU = 0;
|
||||
/// \brief Maximum number of waves per execution unit.
|
||||
unsigned MaxWavesPerEU = 0;
|
||||
/// \brief Maximum flat work group size.
|
||||
unsigned MaxFlatWorkGroupSize = 0;
|
||||
/// \brief SGPR allocation granularity.
|
||||
unsigned SGPRAllocGranule = 0;
|
||||
/// \brief Total number of SGPRs.
|
||||
unsigned TotalNumSGPRs = 0;
|
||||
/// \brief Addressable number of SGPRs.
|
||||
unsigned AddressableNumSGPRs = 0;
|
||||
/// \brief VGPR allocation granularity.
|
||||
unsigned VGPRAllocGranule = 0;
|
||||
/// \brief Total number of VGPRs.
|
||||
unsigned TotalNumVGPRs = 0;
|
||||
/// \brief Addressable number of VGPRs.
|
||||
unsigned AddressableNumVGPRs = 0;
|
||||
|
||||
Metadata() = default;
|
||||
};
|
||||
|
||||
} // end namespace IsaInfo
|
||||
|
||||
namespace Program {
|
||||
|
||||
// In-memory representation of program information.
|
||||
struct Metadata {
|
||||
std::vector<uint8_t> MDVersionSeq;
|
||||
IsaInfo::Metadata IsaInfo;
|
||||
std::vector<std::string> PrintfInfo;
|
||||
std::vector<Kernel::Metadata> Kernels;
|
||||
|
||||
explicit Metadata() = default;
|
||||
|
||||
// Construct from an YAML string.
|
||||
explicit Metadata(const std::string& YAML);
|
||||
|
||||
// Convert to YAML string.
|
||||
std::string toYAML();
|
||||
|
||||
// Convert from YAML string.
|
||||
static Metadata fromYAML(const std::string& S);
|
||||
};
|
||||
|
||||
} // end namespace Program
|
||||
|
||||
} // end namespace RuntimeMD
|
||||
} // end namespace AMDGPU
|
||||
|
||||
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
|
||||
@@ -1,136 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef DEVICE_UTIL_H
|
||||
#define DEVICE_UTIL_H
|
||||
|
||||
#include <hip/hcc_detail/hip_runtime.h>
|
||||
|
||||
/*
|
||||
Heap size computation for malloc and free device functions.
|
||||
*/
|
||||
|
||||
#define HIP_SQRT_2 1.41421356237
|
||||
#define HIP_SQRT_PI 1.77245385091
|
||||
|
||||
#define __hip_erfinva3 -0.140543331
|
||||
#define __hip_erfinva2 0.914624893
|
||||
#define __hip_erfinva1 -1.645349621
|
||||
#define __hip_erfinva0 0.886226899
|
||||
|
||||
#define __hip_erfinvb4 0.012229801
|
||||
#define __hip_erfinvb3 -0.329097515
|
||||
#define __hip_erfinvb2 1.442710462
|
||||
#define __hip_erfinvb1 -2.118377725
|
||||
#define __hip_erfinvb0 1
|
||||
|
||||
#define __hip_erfinvc3 1.641345311
|
||||
#define __hip_erfinvc2 3.429567803
|
||||
#define __hip_erfinvc1 -1.62490649
|
||||
#define __hip_erfinvc0 -1.970840454
|
||||
|
||||
#define __hip_erfinvd2 1.637067800
|
||||
#define __hip_erfinvd1 3.543889200
|
||||
#define __hip_erfinvd0 1
|
||||
|
||||
#define HIP_PI 3.14159265358979323846
|
||||
|
||||
__device__ float __hip_erfinvf(float x);
|
||||
__device__ double __hip_erfinv(double x);
|
||||
|
||||
__device__ float __hip_j0f(float x);
|
||||
__device__ double __hip_j0(double x);
|
||||
|
||||
__device__ float __hip_j1f(float x);
|
||||
__device__ double __hip_j1(double x);
|
||||
|
||||
__device__ float __hip_y0f(float x);
|
||||
__device__ double __hip_y0(double x);
|
||||
|
||||
__device__ float __hip_y1f(float x);
|
||||
__device__ double __hip_y1(double x);
|
||||
|
||||
__device__ float __hip_jnf(int n, float x);
|
||||
__device__ double __hip_jn(int n, double x);
|
||||
|
||||
__device__ float __hip_ynf(int n, float x);
|
||||
__device__ double __hip_yn(int n, double x);
|
||||
|
||||
__device__ float __hip_precise_cosf(float x);
|
||||
__device__ float __hip_precise_exp10f(float x);
|
||||
__device__ float __hip_precise_expf(float x);
|
||||
__device__ float __hip_precise_frsqrt_rn(float x);
|
||||
__device__ float __hip_precise_fsqrt_rd(float x);
|
||||
__device__ float __hip_precise_fsqrt_rn(float x);
|
||||
__device__ float __hip_precise_fsqrt_ru(float x);
|
||||
__device__ float __hip_precise_fsqrt_rz(float x);
|
||||
__device__ float __hip_precise_log10f(float x);
|
||||
__device__ float __hip_precise_log2f(float x);
|
||||
__device__ float __hip_precise_logf(float x);
|
||||
__device__ float __hip_precise_powf(float base, float exponent);
|
||||
__device__ void __hip_precise_sincosf(float x, float* s, float* c);
|
||||
__device__ float __hip_precise_sinf(float x);
|
||||
__device__ float __hip_precise_tanf(float x);
|
||||
// Double Precision Math
|
||||
__device__ double __hip_precise_dsqrt_rd(double x);
|
||||
__device__ double __hip_precise_dsqrt_rn(double x);
|
||||
__device__ double __hip_precise_dsqrt_ru(double x);
|
||||
__device__ double __hip_precise_dsqrt_rz(double x);
|
||||
|
||||
|
||||
// Float Fast Math
|
||||
__device__ float __hip_fast_exp10f(float x);
|
||||
__device__ float __hip_fast_expf(float x);
|
||||
__device__ float __hip_fast_frsqrt_rn(float x);
|
||||
__device__ float __hip_fast_fsqrt_rn(float x);
|
||||
__device__ float __hip_fast_fsqrt_ru(float x);
|
||||
__device__ float __hip_fast_fsqrt_rz(float x);
|
||||
__device__ float __hip_fast_log10f(float x);
|
||||
__device__ float __hip_fast_logf(float x);
|
||||
__device__ float __hip_fast_powf(float base, float exponent);
|
||||
__device__ void __hip_fast_sincosf(float x, float* s, float* c);
|
||||
__device__ float __hip_fast_tanf(float x);
|
||||
// Double Precision Math
|
||||
__device__ double __hip_fast_dsqrt_rd(double x);
|
||||
__device__ double __hip_fast_dsqrt_rn(double x);
|
||||
__device__ double __hip_fast_dsqrt_ru(double x);
|
||||
__device__ double __hip_fast_dsqrt_rz(double x);
|
||||
|
||||
float __hip_host_j0f(float x);
|
||||
double __hip_host_j0(double x);
|
||||
|
||||
float __hip_host_j1f(float x);
|
||||
double __hip_host_j1(double x);
|
||||
|
||||
float __hip_host_y0f(float x);
|
||||
double __hip_host_y1(double x);
|
||||
|
||||
float __hip_host_y1f(float x);
|
||||
double __hip_host_y1(double x);
|
||||
|
||||
float __hip_host_jnf(int n, float x);
|
||||
double __hip_host_jn(int n, double x);
|
||||
|
||||
float __hip_host_ynf(int n, float x);
|
||||
double __hip_host_yn(int n, double x);
|
||||
|
||||
#endif
|
||||
@@ -1,109 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "env.h"
|
||||
|
||||
//---
|
||||
// Read environment variables.
|
||||
void ihipReadEnv_I(int* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description) {
|
||||
char* env = getenv(var_name1);
|
||||
|
||||
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
|
||||
if ((env == NULL) && strcmp(var_name2, "0")) {
|
||||
env = getenv(var_name2);
|
||||
}
|
||||
|
||||
// Default is set when variable is initialized (at top of this file), so only override if we
|
||||
// find an environment variable.
|
||||
if (env) {
|
||||
long int v = strtol(env, NULL, 0);
|
||||
*var_ptr = (int)(v);
|
||||
}
|
||||
if (HIP_PRINT_ENV) {
|
||||
printf("%-30s = %2d : %s\n", var_name1, *var_ptr, description);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ihipReadEnv_S(std::string* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description) {
|
||||
char* env = getenv(var_name1);
|
||||
|
||||
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
|
||||
if ((env == NULL) && strcmp(var_name2, "0")) {
|
||||
env = getenv(var_name2);
|
||||
}
|
||||
|
||||
if (env) {
|
||||
*static_cast<std::string*>(var_ptr) = env;
|
||||
}
|
||||
if (HIP_PRINT_ENV) {
|
||||
printf("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ihipReadEnv_Callback(void* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description,
|
||||
std::string (*setterCallback)(void* var_ptr, const char* env)) {
|
||||
char* env = getenv(var_name1);
|
||||
|
||||
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
|
||||
if ((env == NULL) && strcmp(var_name2, "0")) {
|
||||
env = getenv(var_name2);
|
||||
}
|
||||
|
||||
std::string var_string = "0";
|
||||
if (env) {
|
||||
var_string = setterCallback(var_ptr, env);
|
||||
}
|
||||
if (HIP_PRINT_ENV) {
|
||||
printf("%-30s = %s : %s\n", var_name1, var_string.c_str(), description);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void tokenize(const std::string& s, char delim, std::vector<std::string>* tokens) {
|
||||
std::stringstream ss;
|
||||
ss.str(s);
|
||||
std::string item;
|
||||
while (getline(ss, item, delim)) {
|
||||
item.erase(std::remove(item.begin(), item.end(), ' '), item.end()); // remove whitespace.
|
||||
tokens->push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
void trim(std::string* s) {
|
||||
// trim whitespace from beginning and end:
|
||||
const char* t = "\t\n\r\f\v";
|
||||
s->erase(0, s->find_first_not_of(t));
|
||||
s->erase(s->find_last_not_of(t) + 1);
|
||||
}
|
||||
|
||||
static void ltrim(std::string* s) {
|
||||
// trim whitespace from beginning
|
||||
const char* t = "\t\n\r\f\v";
|
||||
s->erase(0, s->find_first_not_of(t));
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
extern void HipReadEnv();
|
||||
|
||||
|
||||
#define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \
|
||||
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);
|
||||
|
||||
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
|
||||
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);
|
||||
|
||||
#define READ_ENV_C(_build, _ENV_VAR, _ENV_VAR2, _description, _callback) \
|
||||
ihipReadEnv_Callback(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description, _callback);
|
||||
|
||||
|
||||
extern void ihipReadEnv_I(int* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description);
|
||||
extern void ihipReadEnv_S(std::string* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description);
|
||||
extern void ihipReadEnv_Callback(void* var_ptr, const char* var_name1, const char* var_name2,
|
||||
const char* description,
|
||||
std::string (*setterCallback)(void* var_ptr, const char* env));
|
||||
|
||||
|
||||
// String functions:
|
||||
extern void trim(std::string* s);
|
||||
extern void tokenize(const std::string& s, char delim, std::vector<std::string>* tokens);
|
||||
@@ -1,60 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hcc_detail/program_state.hpp"
|
||||
|
||||
#include "hip/hip_runtime_api.h"
|
||||
|
||||
// Internal header, do not percolate upwards.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hc.hpp"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
using namespace hc;
|
||||
using namespace std;
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream)
|
||||
{
|
||||
if (stream) {
|
||||
return *static_cast<hsa_agent_t*>(
|
||||
stream->locked_getAv()->get_hsa_agent());
|
||||
}
|
||||
GET_TLS();
|
||||
if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
|
||||
return ihipGetDevice(
|
||||
ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent;
|
||||
}
|
||||
else {
|
||||
return *static_cast<hsa_agent_t*>(
|
||||
accelerator{}.get_default_view().get_hsa_agent());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hcc_detail/grid_launch_GGL.hpp"
|
||||
|
||||
#if __hcc_workweek__ >= 17481
|
||||
#include "functional_grid_launch.inl"
|
||||
#else
|
||||
#include "macro_based_grid_launch.inl"
|
||||
#endif
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <cstdint>
|
||||
#include <algorithm>
|
||||
|
||||
// conversion routines between float and half precision
|
||||
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
|
||||
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
|
||||
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
||||
|
||||
// half to float, the f16 is in the low 16 bits of the input argument a
|
||||
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
||||
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
||||
std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U;
|
||||
u = (a & 0x7fff) != 0 ? v : u;
|
||||
return u32_as_f32(u) * 0x1.0p-112f;
|
||||
}
|
||||
|
||||
// float to half with nearest even rounding
|
||||
// The lower 16 bits of the result is the bit pattern for the f16
|
||||
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
||||
std::uint32_t u = f32_as_u32(a);
|
||||
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
|
||||
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
|
||||
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
||||
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
||||
std::uint32_t s = (u >> 16) & 0x8000;
|
||||
int b = clamp_int(1-e, 0, 13);
|
||||
std::uint32_t d = (0x1000 | m) >> b;
|
||||
d |= (d << b) != (0x1000 | m);
|
||||
std::uint32_t v = e < 1 ? d : n;
|
||||
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
|
||||
v = e > 30 ? 0x7c00 : v;
|
||||
v = e == 143 ? i : v;
|
||||
return s | v;
|
||||
}
|
||||
|
||||
// On machines without fp16 instructions, clang lowers llvm.convert.from.fp16
|
||||
// to call of this function.
|
||||
extern "C" __attribute__((visibility("default")))
|
||||
float __gnu_h2f_ieee(unsigned short h){
|
||||
return __convert_half_to_float((std::uint32_t) h);
|
||||
}
|
||||
|
||||
// On machines without fp16 instructions, clang lowers llvm.convert.to.fp16
|
||||
// to call of this function.
|
||||
extern "C" __attribute__((visibility("default")))
|
||||
unsigned short __gnu_f2h_ieee(float f){
|
||||
return (unsigned short)__convert_float_to_half(f);
|
||||
}
|
||||
@@ -1,533 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hip_fatbin.h"
|
||||
#include "trace_helper.h"
|
||||
#include "program_state.inl"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC visibility push (default)
|
||||
#endif
|
||||
|
||||
extern "C" std::vector<hipModule_t>*
|
||||
__hipRegisterFatBinary(const void* data)
|
||||
{
|
||||
hip_impl::hip_init();
|
||||
|
||||
tprintf(DB_FB, "Enter __hipRegisterFatBinary(%p)\n", data);
|
||||
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
||||
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const __ClangOffloadBundleHeader* header = fbwrapper->binary;
|
||||
std::string magic(reinterpret_cast<const char*>(header), sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto modules = new std::vector<hipModule_t>(g_deviceCnt);
|
||||
if (!modules) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const __ClangOffloadBundleDesc* desc = &header->desc[0];
|
||||
for (uint64_t i = 0; i < header->numBundles; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
||||
|
||||
std::string triple{&desc->triple[0], sizeof(AMDGCN_AMDHSA_TRIPLE) - 1};
|
||||
if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
|
||||
continue;
|
||||
|
||||
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
|
||||
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
|
||||
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
|
||||
char name[64] = {};
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
|
||||
if (target.compare(name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ihipModule_t* module = new ihipModule_t;
|
||||
if (!module) {
|
||||
continue;
|
||||
}
|
||||
|
||||
hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr,
|
||||
&module->executable);
|
||||
|
||||
std::string image{reinterpret_cast<const char*>(
|
||||
reinterpret_cast<uintptr_t>(header) + desc->offset), desc->size};
|
||||
if (HIP_DUMP_CODE_OBJECT)
|
||||
__hipDumpCodeObject(image);
|
||||
module->executable = hip_impl::get_program_state().load_executable_no_copy(
|
||||
reinterpret_cast<const char*>(header) + desc->offset, desc->size,
|
||||
module->executable, agent);
|
||||
|
||||
if (module->executable.handle) {
|
||||
hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs);
|
||||
modules->at(deviceId) = module;
|
||||
|
||||
tprintf(DB_FB, "Loaded code object for %s, args size=%ld\n", name, module->kernargs.size());
|
||||
} else {
|
||||
fprintf(stderr, "Failed to load code object for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
|
||||
char name[64] = {};
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
|
||||
if (!(*modules)[deviceId]) {
|
||||
fprintf(stderr, "No device code bundle for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
|
||||
return modules;
|
||||
}
|
||||
|
||||
std::map<const void*, std::vector<hipFunction_t>> g_functions;
|
||||
|
||||
extern "C" void __hipRegisterFunction(
|
||||
std::vector<hipModule_t>* modules,
|
||||
const void* hostFunction,
|
||||
char* deviceFunction,
|
||||
const char* deviceName,
|
||||
unsigned int threadLimit,
|
||||
uint3* tid,
|
||||
uint3* bid,
|
||||
dim3* blockDim,
|
||||
dim3* gridDim,
|
||||
int* wSize)
|
||||
{
|
||||
HIP_INIT_API(NONE, modules, hostFunction, deviceFunction, deviceName);
|
||||
std::vector<hipFunction_t> functions(g_deviceCnt);
|
||||
|
||||
assert(modules && modules->size() >= g_deviceCnt);
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hipFunction_t function;
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
if ((hipSuccess == hipModuleGetFunctionEx(&function, modules->at(deviceId), deviceName, &agent) ||
|
||||
// With code-object-v3, we need to match the kernel descriptor symbol name
|
||||
(hipSuccess == hipModuleGetFunctionEx(
|
||||
&function, modules->at(deviceId),
|
||||
(std::string(deviceName) + std::string(".kd")).c_str(),
|
||||
&agent
|
||||
))) && function != nullptr) {
|
||||
functions[deviceId] = function;
|
||||
}
|
||||
else {
|
||||
tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
|
||||
" device %d\n", deviceName, deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
|
||||
}
|
||||
|
||||
static inline const char* hsa_strerror(hsa_status_t status) {
|
||||
const char* str = nullptr;
|
||||
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
||||
return str;
|
||||
}
|
||||
return "Unknown error";
|
||||
}
|
||||
|
||||
struct RegisteredVar {
|
||||
public:
|
||||
RegisteredVar(): size_(0), devicePtr_(nullptr) {}
|
||||
~RegisteredVar() {}
|
||||
|
||||
static inline const char* hsa_strerror(hsa_status_t status) {
|
||||
const char* str = nullptr;
|
||||
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
|
||||
return str;
|
||||
}
|
||||
return "Unknown error";
|
||||
}
|
||||
|
||||
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
|
||||
size_t getvarsize() const { return size_; };
|
||||
|
||||
size_t size_; // Size of the variable
|
||||
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
|
||||
};
|
||||
|
||||
struct DeviceVar {
|
||||
void* shadowVptr;
|
||||
std::string hostVar;
|
||||
size_t size;
|
||||
std::vector<hipModule_t>* modules;
|
||||
std::vector<RegisteredVar> rvars;
|
||||
bool dyn_undef;
|
||||
};
|
||||
|
||||
std::unordered_multimap<std::string, DeviceVar > g_vars;
|
||||
|
||||
//The logic follows PlatformState::getGlobalVar in ROCclr RT
|
||||
static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
|
||||
DeviceVar* dvar = nullptr;
|
||||
if (hmod != nullptr) {
|
||||
// If module is provided, then get the var only from that module
|
||||
auto var_range = g_vars.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
if ((*it->second.modules)[deviceId] == hmod) {
|
||||
dvar = &(it->second);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If var count is < 2, return the var
|
||||
if (g_vars.count(hostVar) < 2) {
|
||||
auto it = g_vars.find(hostVar);
|
||||
dvar = ((it == g_vars.end()) ? nullptr : &(it->second));
|
||||
} else {
|
||||
// If var count is > 2, return the original var,
|
||||
// if original var count != 1, return g_vars.end()/Invalid
|
||||
size_t orig_global_count = 0;
|
||||
auto var_range = g_vars.equal_range(hostVar);
|
||||
for (auto it = var_range.first; it != var_range.second; ++it) {
|
||||
// when dyn_undef is set, it is a shadow var
|
||||
if (it->second.dyn_undef == false) {
|
||||
++orig_global_count;
|
||||
dvar = &(it->second);
|
||||
}
|
||||
}
|
||||
dvar = ((orig_global_count == 1) ? dvar : nullptr);
|
||||
}
|
||||
}
|
||||
return dvar;
|
||||
}
|
||||
|
||||
hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr,
|
||||
const char* hostVar, hipModule_t hmod) {
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (!ctx) return hipErrorInvalidValue;
|
||||
|
||||
auto device = ctx->getDevice();
|
||||
|
||||
if (!device) return hipErrorInvalidValue;
|
||||
|
||||
ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId);
|
||||
|
||||
if (!currentDevice) return hipErrorInvalidValue;
|
||||
|
||||
int deviceId = device->_deviceId;
|
||||
|
||||
DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod);
|
||||
if (dvar == nullptr) return hipErrorInvalidValue;
|
||||
|
||||
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) return hipErrorInvalidValue;
|
||||
|
||||
*size_ptr = dvar->rvars[deviceId].getvarsize();
|
||||
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_agent_t& hasAgent,
|
||||
const char* global_name, void** device_pptr, size_t* bytes) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
hsa_symbol_kind_t sym_type;
|
||||
hsa_executable_symbol_t global_symbol;
|
||||
std::string buildLog;
|
||||
|
||||
/* Find HSA Symbol by name */
|
||||
status = hsa_executable_get_symbol_by_name(hsaExecutable, global_name, &hasAgent,
|
||||
&global_symbol);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol by Name: ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Find HSA Symbol Type */
|
||||
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
|
||||
&sym_type);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol Type : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Make sure symbol type is VARIABLE */
|
||||
if (sym_type != HSA_SYMBOL_KIND_VARIABLE) {
|
||||
buildLog += "Error: Symbol is not of type VARIABLE : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Retrieve the size of the variable */
|
||||
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, bytes);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to retrieve the Symbol Size : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Find HSA Symbol Address */
|
||||
status = hsa_executable_symbol_get_info(global_symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, device_pptr);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
buildLog += "Error: Failed to find the Symbol Address : ";
|
||||
buildLog += hsa_strerror(status);
|
||||
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
|
||||
return false;
|
||||
} else {
|
||||
tprintf(DB_FB, "createGlobalVarObj: var %s : device=%p, size=%zu\n", global_name, *device_pptr, *bytes);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Registers a device-side global variable.
|
||||
// For each global variable in device code, there is a corresponding shadow
|
||||
// global variable in host code. The shadow host variable is used to keep
|
||||
// track of the value of the device side global variable between kernel
|
||||
// executions.
|
||||
// The basic logic is taken from ROCclr RT, but there is much difference.
|
||||
extern "C" void __hipRegisterVar(
|
||||
std::vector<hipModule_t>* modules, // The device modules containing code object
|
||||
char* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
const char* deviceVar, // Variable name in device code
|
||||
int ext, // Whether this variable is external
|
||||
int size, // Size of the variable
|
||||
int constant, // Whether this variable is constant
|
||||
int global) // Unknown, always 0
|
||||
{
|
||||
HIP_INIT_API(__hipRegisterVar, modules, var, hostVar, deviceVar, ext, size, constant, global);
|
||||
|
||||
DeviceVar dvar{var, std::string{ hostVar }, static_cast<size_t>(size), modules,
|
||||
std::vector<RegisteredVar>{ g_deviceCnt }, false };
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; deviceId++) {
|
||||
auto device = ihipGetDevice(deviceId);
|
||||
if(!device) {
|
||||
continue;
|
||||
}
|
||||
hsa_executable_t& executable = (*modules)[deviceId]->executable;
|
||||
hsa_agent_t& agent = g_allAgents[deviceId + 1];
|
||||
size_t bytes = 0;
|
||||
hipDeviceptr_t devicePtr = nullptr;
|
||||
|
||||
bool success = createGlobalVarObj(executable, agent, hostVar, &devicePtr, &bytes);
|
||||
if(!success) {
|
||||
return;
|
||||
}
|
||||
dvar.rvars[deviceId].devicePtr_ = devicePtr;
|
||||
dvar.rvars[deviceId].size_ = bytes;
|
||||
|
||||
hc::AmPointerInfo ptrInfo(nullptr, devicePtr, devicePtr, bytes, device->_acc, true, false);
|
||||
hc::am_memtracker_add(devicePtr, ptrInfo);
|
||||
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
|
||||
#else
|
||||
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u);
|
||||
#endif
|
||||
}
|
||||
g_vars.insert(std::make_pair(std::string(hostVar), dvar));
|
||||
}
|
||||
|
||||
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
|
||||
{
|
||||
std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ delete module; });
|
||||
delete modules;
|
||||
}
|
||||
|
||||
hipError_t hipConfigureCall(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
crit->_execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
extern "C" hipError_t __hipPushCallConfiguration(
|
||||
dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream)
|
||||
{
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
crit->_execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
extern "C" hipError_t __hipPopCallConfiguration(
|
||||
dim3 *gridDim,
|
||||
dim3 *blockDim,
|
||||
size_t *sharedMem,
|
||||
hipStream_t *stream)
|
||||
{
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
ihipExec_t exec;
|
||||
exec = std::move(crit->_execStack.top());
|
||||
crit->_execStack.pop();
|
||||
|
||||
*gridDim = exec._gridDim;
|
||||
*blockDim = exec._blockDim;
|
||||
*sharedMem = exec._sharedMem;
|
||||
*stream = exec._hStream;
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
int getCurrentDeviceId()
|
||||
{
|
||||
GET_TLS();
|
||||
|
||||
int deviceId = 0;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if(!ctx) return deviceId;
|
||||
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
|
||||
if(crit->_execStack.size() != 0)
|
||||
{
|
||||
auto &exec = crit->_execStack.top();
|
||||
|
||||
if (exec._hStream) {
|
||||
deviceId = exec._hStream->getDevice()->_deviceId;
|
||||
} else if (ctx->getDevice()) {
|
||||
deviceId = ctx->getDevice()->_deviceId;
|
||||
}
|
||||
} else if (ctx->getDevice()) {
|
||||
deviceId = ctx->getDevice()->_deviceId;
|
||||
}
|
||||
return deviceId;
|
||||
}
|
||||
|
||||
hipFunction_t ihipGetDeviceFunction(const void *hostFunction)
|
||||
{
|
||||
int deviceId = getCurrentDeviceId();
|
||||
auto it = g_functions.find(hostFunction);
|
||||
if (it == g_functions.end() || !it->second[deviceId]) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second[deviceId];
|
||||
}
|
||||
|
||||
hipError_t hipSetupArgument(
|
||||
const void *arg,
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
HIP_INIT_API(hipSetupArgument, arg, size, offset);
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
auto& arguments = crit->_execStack.top()._arguments;
|
||||
|
||||
if (arguments.size() < offset + size) {
|
||||
arguments.resize(offset + size);
|
||||
}
|
||||
|
||||
::memcpy(&arguments[offset], arg, size);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
HIP_INIT_API(hipLaunchByPtr, hostFunction);
|
||||
ihipExec_t exec;
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
exec = std::move(crit->_execStack.top());
|
||||
crit->_execStack.pop();
|
||||
}
|
||||
|
||||
int deviceId;
|
||||
if (exec._hStream) {
|
||||
deviceId = exec._hStream->getDevice()->_deviceId;
|
||||
}
|
||||
else if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
|
||||
deviceId = ihipGetTlsDefaultCtx()->getDevice()->_deviceId;
|
||||
}
|
||||
else {
|
||||
deviceId = 0;
|
||||
}
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
decltype(g_functions)::iterator it;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end() ||
|
||||
!it->second[deviceId]) {
|
||||
e = hipErrorUnknown;
|
||||
fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p"
|
||||
" for device %d!\n", hostFunction, deviceId);
|
||||
abort();
|
||||
} else {
|
||||
size_t size = exec._arguments.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
e = hipModuleLaunchKernel(it->second[deviceId],
|
||||
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
|
||||
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
|
||||
exec._sharedMem, exec._hStream, nullptr, extra);
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
#ifdef __GNUC__
|
||||
#pragma GCC visibility pop
|
||||
#endif
|
||||
@@ -1,332 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//---
|
||||
// Driver initialization and reporting:
|
||||
|
||||
#include <stack>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
void ihipCtxStackUpdate() {
|
||||
GET_TLS();
|
||||
if (tls->ctxStack.empty()) {
|
||||
tls->ctxStack.push(ihipGetTlsDefaultCtx());
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipInit(unsigned int flags) {
|
||||
HIP_INIT_API(hipInit, flags);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
// Flags must be 0
|
||||
if (flags != 0) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
|
||||
HIP_INIT_API(hipCtxCreate, ctx, flags, device); // FIXME - review if we want to init
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_DeviceCrit_t deviceCrit(deviceHandle->criticalData());
|
||||
auto ictx = new ihipCtx_t(deviceHandle, g_deviceCnt, flags);
|
||||
*ctx = ictx;
|
||||
ihipSetTlsDefaultCtx(*ctx);
|
||||
tls->ctxStack.push(*ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
deviceCrit->addContext(ictx);
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) {
|
||||
HIP_INIT_API(hipDeviceGet, device, deviceId); // FIXME - review if we want to init
|
||||
|
||||
auto deviceHandle = ihipGetDevice(deviceId);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
*device = deviceId;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
};
|
||||
|
||||
hipError_t hipDriverGetVersion(int* driverVersion) {
|
||||
HIP_INIT_API(hipDriverGetVersion, driverVersion);
|
||||
hipError_t e = hipSuccess;
|
||||
if (driverVersion) {
|
||||
*driverVersion = 4;
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
|
||||
HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion);
|
||||
hipError_t e = hipSuccess;
|
||||
if (runtimeVersion) {
|
||||
*runtimeVersion = HIP_VERSION_PATCH;
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxDestroy(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxDestroy, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx();
|
||||
ihipCtx_t* primaryCtx = ((ihipDevice_t*)ctx->getDevice())->_primaryCtx;
|
||||
if (primaryCtx == ctx) {
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
if (currentCtx == ctx) {
|
||||
// need to destroy the ctx associated with calling thread
|
||||
tls->ctxStack.pop();
|
||||
}
|
||||
{
|
||||
auto deviceHandle = ctx->getWriteableDevice();
|
||||
deviceHandle->locked_removeContext(ctx);
|
||||
ctx->locked_reset();
|
||||
}
|
||||
delete ctx; // As per CUDA docs , attempting to access ctx from those threads which has
|
||||
// this ctx as current, will result in the error HIP_ERROR_CONTEXT_IS_DESTROYED.
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
|
||||
HIP_INIT_API(hipCtxPopCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx();
|
||||
auto deviceHandle = currentCtx->getDevice();
|
||||
*ctx = currentCtx;
|
||||
|
||||
if (!tls->ctxStack.empty()) {
|
||||
tls->ctxStack.pop();
|
||||
}
|
||||
|
||||
if (!tls->ctxStack.empty()) {
|
||||
currentCtx = tls->ctxStack.top();
|
||||
} else {
|
||||
currentCtx = deviceHandle->_primaryCtx;
|
||||
}
|
||||
|
||||
ihipSetTlsDefaultCtx(currentCtx); // TOD0 - Shall check for NULL?
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxPushCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if (ctx != NULL) { // TODO- is this check needed?
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls->ctxStack.push(ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
} else {
|
||||
e = hipErrorInvalidContext;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
|
||||
HIP_INIT_API(hipCtxGetCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if ((tls->getPrimaryCtx) || tls->ctxStack.empty()) {
|
||||
*ctx = ihipGetTlsDefaultCtx();
|
||||
} else {
|
||||
*ctx = tls->ctxStack.top();
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
|
||||
HIP_INIT_API(hipCtxSetCurrent, ctx);
|
||||
hipError_t e = hipSuccess;
|
||||
if (ctx == NULL) {
|
||||
tls->ctxStack.pop();
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ctx);
|
||||
tls->ctxStack.push(ctx);
|
||||
tls->getPrimaryCtx = false;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetDevice(hipDevice_t* device) {
|
||||
HIP_INIT_API(hipCtxGetDevice, device);
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (ctx == nullptr) {
|
||||
e = hipErrorInvalidContext;
|
||||
// TODO *device = nullptr;
|
||||
} else {
|
||||
auto deviceHandle = ctx->getDevice();
|
||||
*device = deviceHandle->_deviceId;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
|
||||
HIP_INIT_API(hipCtxGetApiVersion, apiVersion);
|
||||
|
||||
if (apiVersion) {
|
||||
*apiVersion = 4;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) {
|
||||
HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig);
|
||||
|
||||
*cacheConfig = hipFuncCachePreferNone;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig);
|
||||
|
||||
// Nop, AMD does not support variable cache configs.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
|
||||
HIP_INIT_API(hipCtxSetSharedMemConfig, config);
|
||||
|
||||
// Nop, AMD does not support variable shared mem configs.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
|
||||
HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig);
|
||||
|
||||
*pConfig = hipSharedMemBankSizeFourByte;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipCtxSynchronize(void) {
|
||||
HIP_INIT_API(hipCtxSynchronize, 1);
|
||||
return ihipLogStatus(ihipSynchronize(tls)); // TODO Shall check validity of ctx?
|
||||
}
|
||||
|
||||
hipError_t hipCtxGetFlags(unsigned int* flags) {
|
||||
HIP_INIT_API(hipCtxGetFlags, flags);
|
||||
hipError_t e = hipSuccess;
|
||||
ihipCtx_t* tempCtx;
|
||||
tempCtx = ihipGetTlsDefaultCtx();
|
||||
*flags = tempCtx->_ctxFlags;
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(dev);
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
ihipCtx_t* tempCtx;
|
||||
tempCtx = ihipGetTlsDefaultCtx();
|
||||
ihipCtx_t* primaryCtx = deviceHandle->_primaryCtx;
|
||||
if (tempCtx == primaryCtx) {
|
||||
*active = 1;
|
||||
*flags = tempCtx->_ctxFlags;
|
||||
} else {
|
||||
*active = 0;
|
||||
*flags = primaryCtx->_ctxFlags;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxRelease, dev);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(dev);
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(dev);
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
*pctx = deviceHandle->_primaryCtx;
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxReset, dev);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(dev);
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
ihipCtx_t* primaryCtx = deviceHandle->_primaryCtx;
|
||||
primaryCtx->locked_reset();
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
|
||||
HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags);
|
||||
hipError_t e = hipSuccess;
|
||||
auto deviceHandle = ihipGetDevice(dev);
|
||||
|
||||
if (deviceHandle == NULL) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
e = hipErrorContextAlreadyInUse;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
@@ -1,648 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
#include "device_util.h"
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Devices
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// TODO - does this initialize HIP runtime?
|
||||
hipError_t hipGetDevice(int* deviceId) {
|
||||
HIP_INIT_API(hipGetDevice, deviceId);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
if (deviceId == nullptr)
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (ctx == nullptr) {
|
||||
e = hipErrorInvalidDevice; // TODO, check error code.
|
||||
*deviceId = -1;
|
||||
} else {
|
||||
*deviceId = ctx->getDevice()->_deviceId;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
// TODO - does this initialize HIP runtime?
|
||||
hipError_t ihipGetDeviceCount(int* count) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (count != nullptr) {
|
||||
*count = g_deviceCnt;
|
||||
|
||||
if (*count > 0) {
|
||||
e = hipSuccess;
|
||||
} else {
|
||||
e = hipErrorNoDevice;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceCount(int* count) {
|
||||
HIP_INIT_API(hipGetDeviceCount, count);
|
||||
return ihipLogStatus(ihipGetDeviceCount(count));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
|
||||
|
||||
// Nop, AMD does not support variable cache configs.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig) {
|
||||
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
|
||||
|
||||
if (cacheConfig == nullptr) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
*cacheConfig = hipFuncCachePreferNone;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
|
||||
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
|
||||
if (pValue == nullptr) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
#if __HIP_ENABLE_DEVICE_MALLOC__
|
||||
if (limit == hipLimitMallocHeapSize) {
|
||||
*pValue = (size_t)__HIP_SIZE_OF_HEAP;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
#endif
|
||||
return ihipLogStatus(hipErrorUnsupportedLimit);
|
||||
}
|
||||
|
||||
hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
|
||||
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
|
||||
|
||||
// Nop, AMD does not support variable cache configs.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
|
||||
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
|
||||
|
||||
// Nop, AMD does not support variable shared mem configs.
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig) {
|
||||
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
|
||||
|
||||
*pConfig = hipSharedMemBankSizeFourByte;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipSetDevice(int deviceId) {
|
||||
HIP_INIT_API(hipSetDevice, deviceId);
|
||||
if ((deviceId < 0) || (deviceId >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
} else {
|
||||
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId));
|
||||
tls->getPrimaryCtx = true;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipDeviceSynchronize(void) {
|
||||
HIP_INIT_SPECIAL_API(hipDeviceSynchronize, TRACE_SYNC);
|
||||
return ihipLogStatus(ihipSynchronize(tls));
|
||||
}
|
||||
|
||||
hipError_t hipDeviceReset(void) {
|
||||
HIP_INIT_API(hipDeviceReset, );
|
||||
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
// TODO-HCC
|
||||
// This function currently does a user-level cleanup of known resources.
|
||||
// It could benefit from KFD support to perform a more "nuclear" clean that would include any
|
||||
// associated kernel resources and page table entries.
|
||||
|
||||
#if 0
|
||||
if (ctx) {
|
||||
// Release ctx resources (streams and memory):
|
||||
ctx->locked_reset();
|
||||
}
|
||||
#endif
|
||||
if (ctx) {
|
||||
ihipDevice_t* deviceHandle = ctx->getWriteableDevice();
|
||||
deviceHandle->locked_reset();
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipDeviceSetState(TlsData *tls) {
|
||||
hipError_t e = hipErrorInvalidContext;
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
if (ctx) {
|
||||
ihipDevice_t* deviceHandle = ctx->getWriteableDevice();
|
||||
if (deviceHandle->_state == 0) {
|
||||
deviceHandle->_state = 1;
|
||||
}
|
||||
e = hipSuccess;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (pi == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
auto* hipDevice = ihipGetDevice(device);
|
||||
hipDeviceProp_t* prop = &hipDevice->_props;
|
||||
if (hipDevice) {
|
||||
switch (attr) {
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
*pi = prop->maxThreadsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimX:
|
||||
*pi = prop->maxThreadsDim[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimY:
|
||||
*pi = prop->maxThreadsDim[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxBlockDimZ:
|
||||
*pi = prop->maxThreadsDim[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimX:
|
||||
*pi = prop->maxGridSize[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimY:
|
||||
*pi = prop->maxGridSize[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxGridDimZ:
|
||||
*pi = prop->maxGridSize[2];
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerBlock:
|
||||
*pi = prop->sharedMemPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeTotalConstantMemory:
|
||||
*pi = prop->totalConstMem;
|
||||
break;
|
||||
case hipDeviceAttributeWarpSize:
|
||||
*pi = prop->warpSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxRegistersPerBlock:
|
||||
*pi = prop->regsPerBlock;
|
||||
break;
|
||||
case hipDeviceAttributeClockRate:
|
||||
*pi = prop->clockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryClockRate:
|
||||
*pi = prop->memoryClockRate;
|
||||
break;
|
||||
case hipDeviceAttributeMemoryBusWidth:
|
||||
*pi = prop->memoryBusWidth;
|
||||
break;
|
||||
case hipDeviceAttributeMultiprocessorCount:
|
||||
*pi = prop->multiProcessorCount;
|
||||
break;
|
||||
case hipDeviceAttributeComputeMode:
|
||||
*pi = prop->computeMode;
|
||||
break;
|
||||
case hipDeviceAttributeL2CacheSize:
|
||||
*pi = prop->l2CacheSize;
|
||||
break;
|
||||
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
|
||||
*pi = prop->maxThreadsPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMajor:
|
||||
*pi = prop->major;
|
||||
break;
|
||||
case hipDeviceAttributeComputeCapabilityMinor:
|
||||
*pi = prop->minor;
|
||||
break;
|
||||
case hipDeviceAttributePciBusId:
|
||||
*pi = prop->pciBusID;
|
||||
break;
|
||||
case hipDeviceAttributeConcurrentKernels:
|
||||
*pi = prop->concurrentKernels;
|
||||
break;
|
||||
case hipDeviceAttributePciDeviceId:
|
||||
*pi = prop->pciDeviceID;
|
||||
break;
|
||||
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
|
||||
*pi = prop->maxSharedMemoryPerMultiProcessor;
|
||||
break;
|
||||
case hipDeviceAttributeIsMultiGpuBoard:
|
||||
*pi = prop->isMultiGpuBoard;
|
||||
break;
|
||||
case hipDeviceAttributeIntegrated:
|
||||
*pi = prop->integrated;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture1DWidth:
|
||||
*pi = prop->maxTexture1D;
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DWidth:
|
||||
*pi = prop->maxTexture2D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture2DHeight:
|
||||
*pi = prop->maxTexture2D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DWidth:
|
||||
*pi = prop->maxTexture3D[0];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DHeight:
|
||||
*pi = prop->maxTexture3D[1];
|
||||
break;
|
||||
case hipDeviceAttributeMaxTexture3DDepth:
|
||||
*pi = prop->maxTexture3D[2];
|
||||
break;
|
||||
case hipDeviceAttributeHdpMemFlushCntl:
|
||||
{
|
||||
uint32_t** hdp = reinterpret_cast<uint32_t**>(pi);
|
||||
*hdp = prop->hdpMemFlushCntl;
|
||||
}
|
||||
break;
|
||||
case hipDeviceAttributeHdpRegFlushCntl:
|
||||
{
|
||||
uint32_t** hdp = reinterpret_cast<uint32_t**>(pi);
|
||||
*hdp = prop->hdpRegFlushCntl;
|
||||
}
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeLaunch:
|
||||
*pi = prop->cooperativeLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
|
||||
*pi = prop->cooperativeMultiDeviceLaunch;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
|
||||
*pi = prop->cooperativeMultiDeviceUnmatchedFunc;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
|
||||
*pi = prop->cooperativeMultiDeviceUnmatchedGridDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
|
||||
*pi = prop->cooperativeMultiDeviceUnmatchedBlockDim;
|
||||
break;
|
||||
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
|
||||
*pi = prop->cooperativeMultiDeviceUnmatchedSharedMem;
|
||||
break;
|
||||
case hipDeviceAttributeMaxPitch:
|
||||
*pi = prop->memPitch;
|
||||
break;
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
*pi = prop->textureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTexturePitchAlignment:
|
||||
*pi = prop->texturePitchAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
*pi = prop->kernelExecTimeoutEnabled;
|
||||
break;
|
||||
case hipDeviceAttributeCanMapHostMemory:
|
||||
*pi = prop->canMapHostMemory;
|
||||
break;
|
||||
case hipDeviceAttributeEccEnabled:
|
||||
*pi = prop->ECCEnabled;
|
||||
break;
|
||||
default:
|
||||
e = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
|
||||
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
}
|
||||
return ihipLogStatus(ihipDeviceGetAttribute(pi, attr, device));
|
||||
}
|
||||
|
||||
hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, int device) {
|
||||
hipError_t e;
|
||||
|
||||
if (props != nullptr) {
|
||||
auto* hipDevice = ihipGetDevice(device);
|
||||
if (hipDevice) {
|
||||
// copy saved props
|
||||
*props = hipDevice->_props;
|
||||
e = hipSuccess;
|
||||
} else {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
|
||||
HIP_INIT_API(hipGetDeviceProperties, props, device);
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
}
|
||||
return ihipLogStatus(ihipGetDeviceProperties(props, device));
|
||||
}
|
||||
|
||||
hipError_t hipSetDeviceFlags(unsigned int flags) {
|
||||
HIP_INIT_API(hipSetDeviceFlags, flags);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
// TODO : does this really OR in the flags or replaces previous flags:
|
||||
// TODO : Review error handling behavior for this function, it often returns
|
||||
// ErrorSetOnActiveProcess
|
||||
if (ctx) {
|
||||
auto* deviceHandle = ctx->getDevice();
|
||||
if (deviceHandle->_state == 0) {
|
||||
ctx->_ctxFlags = ctx->_ctxFlags | flags;
|
||||
if (flags & hipDeviceScheduleMask) {
|
||||
switch (hipDeviceScheduleMask) {
|
||||
case hipDeviceScheduleAuto:
|
||||
case hipDeviceScheduleSpin:
|
||||
case hipDeviceScheduleYield:
|
||||
case hipDeviceScheduleBlockingSync:
|
||||
e = hipSuccess;
|
||||
break;
|
||||
default:
|
||||
e = hipSuccess; // TODO - should this be error? Map to Auto?
|
||||
// e = hipErrorInvalidValue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned supportedFlags =
|
||||
hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax;
|
||||
|
||||
if (flags & (~supportedFlags)) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorSetOnActiveProcess;
|
||||
}
|
||||
} else {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
};
|
||||
|
||||
hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
|
||||
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
|
||||
hipError_t e = hipSuccess;
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device);
|
||||
e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
|
||||
// Cast to void* here to avoid printing garbage in debug modes.
|
||||
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
|
||||
hipError_t e = hipSuccess;
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
int nameLen = strlen(deviceHandle->_props.name);
|
||||
if (nameLen <= len) memcpy(name, deviceHandle->_props.name, nameLen);
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device) {
|
||||
// Cast to void* here to avoid printing garbage in debug modes.
|
||||
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
|
||||
hipError_t e = hipErrorInvalidValue;
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
if ((pciBusId != nullptr) && (len > 0)) {
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
int retVal =
|
||||
snprintf(pciBusId, len, "%04x:%02x:%02x.0", deviceHandle->_props.pciDomainID,
|
||||
deviceHandle->_props.pciBusID, deviceHandle->_props.pciDeviceID);
|
||||
if (retVal > 0 && retVal < len) {
|
||||
e = hipSuccess;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
|
||||
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
|
||||
hipError_t e = hipSuccess;
|
||||
if ((device < 0) || (device >= g_deviceCnt)) {
|
||||
e = hipErrorInvalidDevice;
|
||||
} else {
|
||||
auto deviceHandle = ihipGetDevice(device);
|
||||
*bytes = deviceHandle->_props.totalGlobalMem;
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
|
||||
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusId);
|
||||
hipDeviceProp_t tempProp;
|
||||
int deviceCount = 0;
|
||||
hipError_t e = hipErrorInvalidValue;
|
||||
if ((device != nullptr) && (pciBusId != nullptr)) {
|
||||
int pciBusID = -1;
|
||||
int pciDeviceID = -1;
|
||||
int pciDomainID = -1;
|
||||
int len = 0;
|
||||
len = sscanf(pciBusId, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID);
|
||||
if (len == 3) {
|
||||
ihipGetDeviceCount(&deviceCount);
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
ihipGetDeviceProperties(&tempProp, i);
|
||||
if (tempProp.pciBusID == pciBusID) {
|
||||
*device = i;
|
||||
e = hipSuccess;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
|
||||
HIP_INIT_API(hipChooseDevice, device, prop);
|
||||
hipDeviceProp_t tempProp;
|
||||
hipError_t e = hipSuccess;
|
||||
if ((device == NULL) || (prop == NULL)) {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
if (e == hipSuccess) {
|
||||
int deviceCount;
|
||||
int inPropCount = 0;
|
||||
int matchedPropCount = 0;
|
||||
ihipGetDeviceCount(&deviceCount);
|
||||
*device = 0;
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
ihipGetDeviceProperties(&tempProp, i);
|
||||
if (prop->major != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.major >= prop->major) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
if (prop->minor != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.minor >= prop->minor) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (prop->totalGlobalMem != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.totalGlobalMem >= prop->totalGlobalMem) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->sharedMemPerBlock != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.sharedMemPerBlock >= prop->sharedMemPerBlock) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->maxThreadsPerBlock != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.maxThreadsPerBlock >= prop->maxThreadsPerBlock) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->totalConstMem != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.totalConstMem >= prop->totalConstMem) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->multiProcessorCount != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.multiProcessorCount >= prop->multiProcessorCount) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->maxThreadsPerMultiProcessor != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.maxThreadsPerMultiProcessor >= prop->maxThreadsPerMultiProcessor) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (prop->memoryClockRate != 0) {
|
||||
inPropCount++;
|
||||
if (tempProp.memoryClockRate >= prop->memoryClockRate) {
|
||||
matchedPropCount++;
|
||||
}
|
||||
}
|
||||
if (inPropCount == matchedPropCount) {
|
||||
*device = i;
|
||||
}
|
||||
#if 0
|
||||
else{
|
||||
e= hipErrorInvalidValue;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
#define HSA_ERROR_CHECK(hsa_error_code) \
|
||||
if ((hsa_error_code != HSA_STATUS_SUCCESS) && (hsa_error_code != HSA_STATUS_INFO_BREAK)) { \
|
||||
return ihipLogStatus(hipErrorRuntimeOther); \
|
||||
}
|
||||
|
||||
hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) {
|
||||
HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount);
|
||||
|
||||
if ((device1 < 0) || (device1 >= g_deviceCnt) || (device2 < 0) || (device2 >= g_deviceCnt)) {
|
||||
return ihipLogStatus(hipErrorInvalidDevice);
|
||||
} else {
|
||||
auto device1Handle = ihipGetDevice(device1);
|
||||
auto device2Handle = ihipGetDevice(device2);
|
||||
|
||||
const auto& find_pool = [](hsa_amd_memory_pool_t pool, void* data) {
|
||||
bool allowed;
|
||||
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &allowed);
|
||||
if (allowed) {
|
||||
hsa_amd_segment_t segment;
|
||||
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flags;
|
||||
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
|
||||
if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
|
||||
*((hsa_amd_memory_pool_t*)data) = pool;
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
hsa_status_t err;
|
||||
hsa_amd_memory_pool_t pool;
|
||||
err = hsa_amd_agent_iterate_memory_pools(device2Handle->_hsaAgent, find_pool, (void*)&pool);
|
||||
HSA_ERROR_CHECK(err);
|
||||
|
||||
hsa_amd_memory_pool_link_info_t link_info;
|
||||
err = hsa_amd_agent_memory_pool_get_info(device1Handle->_hsaAgent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, &link_info);
|
||||
HSA_ERROR_CHECK(err);
|
||||
*linktype = link_info.link_type;
|
||||
|
||||
if (link_info.numa_distance < 30)
|
||||
*hopcount = 1;
|
||||
else
|
||||
*hopcount = 2;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Error Handling
|
||||
//---
|
||||
|
||||
hipError_t hipGetLastError() {
|
||||
HIP_INIT_API(hipGetLastError);
|
||||
|
||||
// Return last error, but then reset the state:
|
||||
hipError_t e = ihipLogStatus(tls->lastHipError);
|
||||
tls->lastHipError = hipSuccess;
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipPeekAtLastError() {
|
||||
HIP_INIT_API(hipPeekAtLastError);
|
||||
|
||||
// peek at last error, but don't reset it.
|
||||
return ihipLogStatus(tls->lastHipError);
|
||||
}
|
||||
|
||||
const char* hipGetErrorName(hipError_t hip_error) {
|
||||
HIP_INIT_API(hipGetErrorName, hip_error);
|
||||
|
||||
return ihipErrorString(hip_error);
|
||||
}
|
||||
|
||||
const char* hipGetErrorString(hipError_t hip_error) {
|
||||
HIP_INIT_API(hipGetErrorString, hip_error);
|
||||
|
||||
// TODO - return a message explaining the error.
|
||||
// TODO - This should be set up to return the same string reported in the the doxygen comments,
|
||||
// somehow.
|
||||
return hipGetErrorName(hip_error);
|
||||
}
|
||||
@@ -1,483 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <errno.h> // errno, ENOENT
|
||||
#include <fcntl.h> // O_RDWR, O_CREATE
|
||||
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
|
||||
#include <unistd.h> // ftruncate, close
|
||||
|
||||
namespace {
|
||||
|
||||
inline
|
||||
const char* hsa_to_string(hsa_status_t err) noexcept
|
||||
{
|
||||
const char* r{};
|
||||
|
||||
if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r;
|
||||
|
||||
return "Unknown.";
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_result_check(hsa_status_t res, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (res == HSA_STATUS_SUCCESS) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + hsa_to_string(res)};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_retval_check(int good, int retval, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (retval == good) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + strerror(retval)};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n, std::size_t o>
|
||||
inline
|
||||
void throwing_msg_check(bool bad, const char (&msg)[o],
|
||||
const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (!bad) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + msg};
|
||||
}
|
||||
|
||||
template<std::size_t m, std::size_t n>
|
||||
inline
|
||||
void throwing_errno_check(bool bad, const char (&file)[m],
|
||||
const char (&function)[n], int line) {
|
||||
if (!bad) return;
|
||||
|
||||
throw std::runtime_error{"Failed in file " + (file +
|
||||
(", in function \"" + (function +
|
||||
("\", on line " + std::to_string(line))))) +
|
||||
", with error: " + strerror(errno)};
|
||||
}
|
||||
|
||||
} // Unnamed namespace.
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Events
|
||||
//---
|
||||
|
||||
|
||||
ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) {
|
||||
_flags = flags;
|
||||
GET_TLS();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
_deviceId = ctx == nullptr ? -1 : ctx->getDevice()->_deviceId;
|
||||
};
|
||||
|
||||
|
||||
// Attach to an existing completion future:
|
||||
void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipStream_t stream,
|
||||
ihipEventType_t eventType) {
|
||||
LockedAccessor_EventCrit_t crit(_criticalData);
|
||||
crit->_eventData.marker(*cf);
|
||||
crit->_eventData._type = eventType;
|
||||
crit->_eventData._stream = stream;
|
||||
crit->_eventData._state = hipEventStatusRecording;
|
||||
}
|
||||
|
||||
|
||||
static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) {
|
||||
if (!ecd._ipc_name.empty()) return;
|
||||
|
||||
// create random shmem name
|
||||
char name_template[] = "/tmp/eventXXXXXX";
|
||||
int temp_fd = mkstemp(name_template);
|
||||
throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__);
|
||||
|
||||
// copy shmem name into event data, reformat to use a single slash
|
||||
ecd._ipc_name = name_template;
|
||||
ecd._ipc_name.replace(0, 5, "/hip_");
|
||||
|
||||
// open shmem
|
||||
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777);
|
||||
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
|
||||
|
||||
// size it
|
||||
throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
|
||||
|
||||
// mmap it
|
||||
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
|
||||
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
|
||||
|
||||
// initialize shared state
|
||||
ecd._ipc_shmem->owners = 1;
|
||||
ecd._ipc_shmem->read_index = -1;
|
||||
ecd._ipc_shmem->write_index = 0;
|
||||
for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) {
|
||||
ecd._ipc_shmem->signal[i] = 0;
|
||||
}
|
||||
|
||||
// remove temp file
|
||||
throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__);
|
||||
throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__);
|
||||
}
|
||||
|
||||
|
||||
static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t &ecd) {
|
||||
if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) {
|
||||
if ((ecd._type == hipEventTypeIndependent) ||
|
||||
(ecd._type == hipEventTypeStopCommand)) {
|
||||
ecd._timestamp = ecd.marker().get_end_tick();
|
||||
} else if (ecd._type == hipEventTypeStartCommand) {
|
||||
ecd._timestamp = ecd.marker().get_begin_tick();
|
||||
} else {
|
||||
ecd._timestamp = 0;
|
||||
assert(0); // TODO - move to debug assert
|
||||
}
|
||||
|
||||
ecd._state = hipEventStatusComplete;
|
||||
|
||||
return std::pair<hipEventStatus_t, uint64_t>(ecd._state,
|
||||
ecd._timestamp);
|
||||
}
|
||||
|
||||
// Not complete path here:
|
||||
return std::pair<hipEventStatus_t, uint64_t>(ecd._state, ecd._timestamp);
|
||||
}
|
||||
|
||||
|
||||
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) {
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
|
||||
hipEventReleaseToDevice | hipEventReleaseToSystem |
|
||||
hipEventInterprocess;
|
||||
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
|
||||
|
||||
const bool illegalFlags =
|
||||
(flags & ~supportedFlags) || // can't set any unsupported flags.
|
||||
(flags & releaseFlags) == releaseFlags; // can't set both release flags
|
||||
|
||||
if (event && !illegalFlags) {
|
||||
*event = new ihipEvent_t(flags);
|
||||
} else {
|
||||
e = hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
|
||||
HIP_INIT_API(hipEventCreateWithFlags, event, flags);
|
||||
|
||||
return ihipLogStatus(ihipEventCreate(event, flags));
|
||||
}
|
||||
|
||||
hipError_t hipEventCreate(hipEvent_t* event) {
|
||||
HIP_INIT_API(hipEventCreate, event);
|
||||
|
||||
return ihipLogStatus(ihipEventCreate(event, 0));
|
||||
}
|
||||
|
||||
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream);
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
LockedAccessor_EventCrit_t eCrit(event->criticalData());
|
||||
auto &ecd{eCrit->_eventData};
|
||||
if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
|
||||
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
|
||||
// If default stream , then wait on all queues.
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
ecd.marker(hc::completion_future()); // reset event
|
||||
ecd._stream = stream;
|
||||
ecd._timestamp = hc::get_system_ticks();
|
||||
ecd._state = hipEventStatusComplete;
|
||||
// TODO handle IPC case?
|
||||
}
|
||||
else {
|
||||
// Record the event in the stream:
|
||||
ecd.marker(stream->locked_recordEvent(event));
|
||||
ecd._stream = stream;
|
||||
ecd._timestamp = 0;
|
||||
ecd._state = hipEventStatusRecording;
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
createIpcEventShmemIfNeeded(ecd);
|
||||
int write_index = ecd._ipc_shmem->write_index++; // fetch add
|
||||
int offset = write_index % IPC_SIGNALS_PER_EVENT;
|
||||
// While event still valid and still locked, spin.
|
||||
while (ecd._ipc_shmem->signal[offset] != 0) {
|
||||
// TODO backoff
|
||||
}
|
||||
// Lock signal.
|
||||
ecd._ipc_shmem->signal[offset] = 1;
|
||||
// forward signal state from local signal to IPC signal via host callback
|
||||
// create callback that can be passed to hsa_amd_signal_async_handler
|
||||
// this function decrements the IPC signal by 1 to indicate completion
|
||||
std::atomic<int> *signal = &ecd._ipc_shmem->signal[offset];
|
||||
auto t{new std::function<void()>{[=]() {
|
||||
signal->store(0);
|
||||
}}};
|
||||
// register above callback with HSA runtime to be called when local signal
|
||||
// is decremented from 1 to 0 by CP
|
||||
auto local_signal = *reinterpret_cast<hsa_signal_t*>(eCrit->_eventData.marker().get_native_handle());
|
||||
hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1,
|
||||
[](hsa_signal_value_t x, void* p) {
|
||||
(*static_cast<decltype(t)>(p))();
|
||||
delete static_cast<decltype(t)>(p);
|
||||
return false;
|
||||
}, t);
|
||||
// Update read index to indicate new signal.
|
||||
int expected = write_index-1;
|
||||
while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) {
|
||||
throwing_msg_check(
|
||||
expected >= write_index,
|
||||
"IPC event record update read index failure",
|
||||
__FILE__, __func__, __LINE__);
|
||||
expected = write_index-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipEventDestroy(hipEvent_t event) {
|
||||
HIP_INIT_API(hipEventDestroy, event);
|
||||
|
||||
if (event) {
|
||||
{
|
||||
LockedAccessor_EventCrit_t crit(event->criticalData());
|
||||
auto &ecd{crit->_eventData};
|
||||
if (ecd._ipc_shmem) {
|
||||
int owners = --ecd._ipc_shmem->owners;
|
||||
throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
|
||||
throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__);
|
||||
if (0 == owners)
|
||||
throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__);
|
||||
}
|
||||
}
|
||||
delete event;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipEventSynchronize(hipEvent_t event) {
|
||||
HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event);
|
||||
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN,
|
||||
"hipEventSynchronize on event without system-scope fence ; consider creating with "
|
||||
"hipEventReleaseToSystem\n");
|
||||
}
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
// this is an IPC event
|
||||
int previous_read_index = ecd._ipc_shmem->read_index;
|
||||
if (previous_read_index >= 0) {
|
||||
// we have at least one recorded event, so proceed
|
||||
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
|
||||
// While event still valid and still locked, spin.
|
||||
while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
|
||||
// TODO backoff
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
if (ecd._state == hipEventStatusUnitialized) {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
} else if (ecd._state == hipEventStatusCreated) {
|
||||
// Created but not actually recorded on any device:
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
|
||||
auto* ctx = ihipGetTlsDefaultCtx();
|
||||
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
|
||||
ctx->locked_syncDefaultStream(true, true);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
|
||||
: hc::hcWaitModeActive);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
|
||||
HIP_INIT_API(hipEventElapsedTime, ms, start, stop);
|
||||
|
||||
if (ms == nullptr) return ihipLogStatus(hipErrorInvalidValue);
|
||||
if ((start == nullptr) || (stop == nullptr) ||
|
||||
(start->_deviceId != stop->_deviceId))
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
*ms = 0.0f;
|
||||
auto startEcd = start->locked_copyCrit();
|
||||
auto stopEcd = stop->locked_copyCrit();
|
||||
|
||||
if ((start->_flags & hipEventDisableTiming) ||
|
||||
(startEcd._state == hipEventStatusUnitialized) ||
|
||||
(startEcd._state == hipEventStatusCreated) ||
|
||||
(stop->_flags & hipEventDisableTiming) ||
|
||||
(stopEcd._state == hipEventStatusUnitialized) ||
|
||||
(stopEcd._state == hipEventStatusCreated)) {
|
||||
// Both events must be at least recorded else return hipErrorInvalidHandle
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
}
|
||||
|
||||
// Refresh status, if still recording...
|
||||
|
||||
auto startStatus = refreshEventStatus(startEcd); // pair < state, timestamp >
|
||||
auto stopStatus = refreshEventStatus(stopEcd); // pair < state, timestamp >
|
||||
|
||||
if ((startStatus.first == hipEventStatusComplete) &&
|
||||
(stopStatus.first == hipEventStatusComplete)) {
|
||||
// Common case, we have good information for both events. 'second' is the timestamp:
|
||||
int64_t tickDiff = (stopStatus.second - startStatus.second);
|
||||
uint64_t freqHz;
|
||||
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
|
||||
if (freqHz) {
|
||||
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
} else {
|
||||
*ms = 0.0f;
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
} else if ((startStatus.first == hipEventStatusRecording) ||
|
||||
(stopStatus.first == hipEventStatusRecording)) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
} else {
|
||||
assert(0); // TODO should we return hipErrorUnknown ?
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipEventQuery(hipEvent_t event) {
|
||||
HIP_INIT_SPECIAL_API(hipEventQuery, TRACE_QUERY, event);
|
||||
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
if (!(event->_flags & hipEventReleaseToSystem)) {
|
||||
tprintf(DB_WARN,
|
||||
"hipEventQuery on event without system-scope fence ; consider creating with "
|
||||
"hipEventReleaseToSystem\n");
|
||||
}
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
|
||||
// this event is either from an ipc handle, or the owner of a local ipc event
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
if (ecd._ipc_shmem) {
|
||||
int previous_read_index = ecd._ipc_shmem->read_index;
|
||||
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
|
||||
if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
}
|
||||
else {
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
}
|
||||
// normal event
|
||||
else {
|
||||
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
|
||||
return ihipLogStatus(hipErrorNotReady);
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)
|
||||
{
|
||||
HIP_INIT_API(hipIpcGetEventHandle, handle, event);
|
||||
|
||||
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
|
||||
if (!handle) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
LockedAccessor_EventCrit_t crit(event->criticalData());
|
||||
|
||||
auto &ecd{crit->_eventData};
|
||||
createIpcEventShmemIfNeeded(ecd);
|
||||
// copy name into handle
|
||||
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle;
|
||||
memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE);
|
||||
ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
#else
|
||||
return ihipLogStatus(hipErrorNotSupported);
|
||||
#endif
|
||||
}
|
||||
|
||||
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)
|
||||
{
|
||||
HIP_INIT_API(hipIpcOpenEventHandle, event, &handle);
|
||||
|
||||
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
// create a new event with timing disabled, per spec
|
||||
auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess);
|
||||
if (hip_status != hipSuccess) return ihipLogStatus(hip_status);
|
||||
|
||||
LockedAccessor_EventCrit_t crit((*event)->criticalData());
|
||||
auto &ecd{crit->_eventData};
|
||||
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle;
|
||||
ecd._ipc_name = iHandle->shmem_name;
|
||||
// open shmem
|
||||
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777);
|
||||
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
|
||||
// mmap it
|
||||
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
|
||||
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
|
||||
// update shared state
|
||||
ecd._ipc_shmem->owners += 1;
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
#else
|
||||
return ihipLogStatus(hipErrorNotSupported);
|
||||
#endif
|
||||
}
|
||||
@@ -1,91 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "hip_fatbin.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
void __hipDumpCodeObject(const std::string& image) {
|
||||
char fname[30];
|
||||
static std::atomic<int> index;
|
||||
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
|
||||
tprintf(DB_FB, "Dump code object %s\n", fname);
|
||||
std::ofstream ofs;
|
||||
ofs.open(fname, std::ios::binary);
|
||||
ofs << image;
|
||||
ofs.close();
|
||||
}
|
||||
|
||||
// Returns a pointer to the code object in the fatbin. The pointer should not
|
||||
// be freed.
|
||||
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const char* agent_name)
|
||||
{
|
||||
hip_impl::hip_init();
|
||||
|
||||
tprintf(DB_FB, "Enter __hipExtractCodeObjectFromFatBinary(%p, \"%s\")\n",
|
||||
data, agent_name);
|
||||
|
||||
const __ClangOffloadBundleHeader* header
|
||||
= reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
|
||||
std::string magic(reinterpret_cast<const char*>(header),
|
||||
sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1);
|
||||
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const __ClangOffloadBundleDesc* desc = &header->desc[0];
|
||||
for (uint64_t i = 0; i < header->numBundles; ++i,
|
||||
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
|
||||
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
|
||||
|
||||
std::string triple{&desc->triple[0], sizeof(AMDGCN_AMDHSA_TRIPLE) - 1};
|
||||
if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
|
||||
continue;
|
||||
|
||||
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
|
||||
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
|
||||
tprintf(DB_FB, "Found hip-clang bundle for %s\n", target.c_str());
|
||||
if (target.compare(agent_name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto *codeobj = reinterpret_cast<const char*>(
|
||||
reinterpret_cast<uintptr_t>(header) + desc->offset);
|
||||
if (HIP_DUMP_CODE_OBJECT)
|
||||
__hipDumpCodeObject(std::string{codeobj, desc->size});
|
||||
|
||||
tprintf(DB_FB, "__hipExtractCodeObjectFromFatBinary succeeds and returns %p\n",
|
||||
codeobj);
|
||||
return codeobj;
|
||||
}
|
||||
|
||||
// hipcc --genco for HCC generates fat binaries with different triple strings.
|
||||
// It will reach here and return a null pointer. The fat binary itself will
|
||||
// be handled in a different place.
|
||||
tprintf(DB_FB, "No hip-clang device code bundle for %s\n", agent_name);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef HIP_SRC_HIP_FATBIN_H
|
||||
#define HIP_SRC_HIP_FATBIN_H
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
|
||||
// hip-clang fatbin format
|
||||
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
|
||||
#define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__"
|
||||
#define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
|
||||
|
||||
struct __ClangOffloadBundleDesc {
|
||||
uint64_t offset;
|
||||
uint64_t size;
|
||||
uint64_t tripleSize;
|
||||
const char triple[1];
|
||||
};
|
||||
|
||||
struct __ClangOffloadBundleHeader {
|
||||
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1];
|
||||
uint64_t numBundles;
|
||||
__ClangOffloadBundleDesc desc[1];
|
||||
};
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
__ClangOffloadBundleHeader* binary;
|
||||
void* unused;
|
||||
};
|
||||
|
||||
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
|
||||
const char* agent_name);
|
||||
void __hipDumpCodeObject(const std::string& image);
|
||||
|
||||
#endif // HIP_SRC_HIP_FATBIN_H
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -1,53 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_prof_api.h"
|
||||
|
||||
// HIP API callback/activity
|
||||
|
||||
api_callbacks_table_t callbacks_table;
|
||||
|
||||
extern std::string& FunctionSymbol(const hipFunction_t f);
|
||||
const char* hipKernelNameRef(const hipFunction_t f) { return FunctionSymbol(f).c_str(); }
|
||||
|
||||
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
|
||||
return callbacks_table.set_callback(id, reinterpret_cast<api_callbacks_table_t::fun_t>(fun), arg) ?
|
||||
hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRemoveApiCallback(uint32_t id) {
|
||||
return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) {
|
||||
return callbacks_table.set_activity(id, reinterpret_cast<api_callbacks_table_t::act_t>(fun), arg) ?
|
||||
hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipError_t hipRemoveActivityCallback(uint32_t id) {
|
||||
return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
const char* hipApiName(uint32_t id) {
|
||||
return hip_api_name(id);
|
||||
}
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -1,231 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hc_am.hpp>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
|
||||
// Peer access functions.
|
||||
// There are two flavors:
|
||||
// - one where contexts are specified with hipCtx_t type.
|
||||
// - one where contexts are specified with integer deviceIds, that are mapped to the primary
|
||||
// context for that device.
|
||||
// The implementation contains a set of internal ihip* functions which operate on contexts. Then
|
||||
// the public APIs are thin wrappers which call into this internal implementations.
|
||||
// TODO - actually not yet - currently the integer deviceId flavors just call the context APIs. need
|
||||
// to fix.
|
||||
|
||||
|
||||
hipError_t ihipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
|
||||
hipError_t err = hipSuccess;
|
||||
|
||||
if(canAccessPeer == NULL) {
|
||||
err = hipErrorInvalidValue;
|
||||
}
|
||||
else if ((thisCtx != NULL) && (peerCtx != NULL)) {
|
||||
if (thisCtx == peerCtx) {
|
||||
*canAccessPeer = 0;
|
||||
tprintf(DB_MEM, "Can't be peer to self. (this=%s, peer=%s)\n",
|
||||
thisCtx->toString().c_str(), peerCtx->toString().c_str());
|
||||
} else if (HIP_FORCE_P2P_HOST & 0x2) {
|
||||
*canAccessPeer = false;
|
||||
tprintf(DB_MEM,
|
||||
"HIP_FORCE_P2P_HOST denies peer access this=%s peer=%s canAccessPeer=%d\n",
|
||||
thisCtx->toString().c_str(), peerCtx->toString().c_str(), *canAccessPeer);
|
||||
} else {
|
||||
*canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
|
||||
tprintf(DB_MEM, "deviceCanAccessPeer this=%s peer=%s canAccessPeer=%d\n",
|
||||
thisCtx->toString().c_str(), peerCtx->toString().c_str(), *canAccessPeer);
|
||||
}
|
||||
|
||||
} else {
|
||||
*canAccessPeer = 0;
|
||||
err = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P
|
||||
*/
|
||||
//---
|
||||
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
|
||||
HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx);
|
||||
|
||||
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, thisCtx, peerCtx));
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
// Disable visibility of this device into memory allocated on peer device.
|
||||
// Remove this device from peer device peerlist.
|
||||
hipError_t ihipDisablePeerAccess(TlsData* tls, hipCtx_t peerCtx) {
|
||||
hipError_t err = hipSuccess;
|
||||
|
||||
auto thisCtx = ihipGetTlsDefaultCtx();
|
||||
if ((thisCtx != NULL) && (peerCtx != NULL)) {
|
||||
bool canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
|
||||
|
||||
if (!canAccessPeer) {
|
||||
err = hipErrorInvalidDevice; // P2P not allowed between these devices.
|
||||
} else if (thisCtx == peerCtx) {
|
||||
err = hipErrorInvalidDevice; // Can't disable peer access to self.
|
||||
} else {
|
||||
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
|
||||
bool changed = peerCrit->removePeerWatcher(peerCtx, thisCtx);
|
||||
if (changed) {
|
||||
tprintf(DB_MEM, "device %s disable access to memory allocated on peer:%s\n",
|
||||
thisCtx->toString().c_str(), peerCtx->toString().c_str());
|
||||
// Update the peers for all memory already saved in the tracker:
|
||||
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(),
|
||||
peerCrit->peerAgents());
|
||||
} else {
|
||||
err = hipErrorPeerAccessNotEnabled; // never enabled P2P access.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
err = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
return err;
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
// Allow the current device to see all memory allocated on peerCtx.
|
||||
// This should add this device to the peer-device peer list.
|
||||
hipError_t ihipEnablePeerAccess(TlsData* tls, hipCtx_t peerCtx, unsigned int flags) {
|
||||
hipError_t err = hipSuccess;
|
||||
if (flags != 0) {
|
||||
err = hipErrorInvalidValue;
|
||||
} else {
|
||||
auto thisCtx = ihipGetTlsDefaultCtx();
|
||||
if (thisCtx == peerCtx) {
|
||||
err = hipErrorInvalidDevice; // Can't enable peer access to self.
|
||||
} else if ((thisCtx != NULL) && (peerCtx != NULL)) {
|
||||
|
||||
int canAccess = 0;
|
||||
if ((hipSuccess != ihipDeviceCanAccessPeer(&canAccess,thisCtx,peerCtx)) || (canAccess == 0)){
|
||||
tprintf(DB_MEM, "device=%s can't access peer=%s\n",thisCtx->toString().c_str(), peerCtx->toString().c_str());
|
||||
err = hipErrorInvalidDevice;
|
||||
} else {
|
||||
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
|
||||
// Add thisCtx to peerCtx's access list so that new allocations on peer will be made
|
||||
// visible to this device:
|
||||
bool isNewPeer = peerCrit->addPeerWatcher(peerCtx, thisCtx);
|
||||
if (isNewPeer) {
|
||||
tprintf(DB_MEM, "device=%s can now see all memory allocated on peer=%s\n",
|
||||
thisCtx->toString().c_str(), peerCtx->toString().c_str());
|
||||
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(),
|
||||
peerCrit->peerAgents());
|
||||
} else {
|
||||
err = hipErrorPeerAccessAlreadyEnabled;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
err = hipErrorInvalidDevice;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx,
|
||||
size_t sizeBytes) {
|
||||
HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes);
|
||||
|
||||
// TODO - move to ihip memory copy implementaion.
|
||||
// HCC has a unified memory architecture so device specifiers are not required.
|
||||
return ihipLogStatus(hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault));
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice,
|
||||
size_t sizeBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream);
|
||||
|
||||
// TODO - move to ihip memory copy implementaion.
|
||||
// HCC has a unified memory architecture so device specifiers are not required.
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
|
||||
};
|
||||
|
||||
|
||||
//=============================================================================
|
||||
// These are the flavors that accept integer deviceIDs.
|
||||
// Implementations map these to primary contexts and call the internal functions above.
|
||||
//=============================================================================
|
||||
|
||||
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) {
|
||||
HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId);
|
||||
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId),
|
||||
ihipGetPrimaryCtx(peerDeviceId)));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
|
||||
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
|
||||
|
||||
return ihipLogStatus(ihipDisablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId)));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
|
||||
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
|
||||
|
||||
return ihipLogStatus(ihipEnablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId), flags));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
|
||||
size_t sizeBytes) {
|
||||
HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes);
|
||||
return ihipLogStatus(hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src,
|
||||
ihipGetPrimaryCtx(srcDevice), sizeBytes));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice,
|
||||
size_t sizeBytes, hipStream_t stream) {
|
||||
HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream);
|
||||
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
|
||||
}
|
||||
|
||||
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
|
||||
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
|
||||
|
||||
return ihipLogStatus(ihipEnablePeerAccess(tls, peerCtx, flags));
|
||||
}
|
||||
|
||||
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
|
||||
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
|
||||
|
||||
return ihipLogStatus(ihipDisablePeerAccess(tls, peerCtx));
|
||||
}
|
||||
@@ -1,200 +0,0 @@
|
||||
// automatically generated sources
|
||||
#ifndef _HIP_PROF_API_H
|
||||
#define _HIP_PROF_API_H
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#include "hip/hcc_detail/hip_prof_str.h"
|
||||
|
||||
template <typename Record, typename Fun, typename Act>
|
||||
class api_callbacks_table_templ {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
|
||||
typedef Record record_t;
|
||||
typedef Fun fun_t;
|
||||
typedef Act act_t;
|
||||
|
||||
// HIP API callbacks table
|
||||
struct hip_cb_table_entry_t {
|
||||
volatile std::atomic<bool> sync;
|
||||
volatile std::atomic<uint32_t> sem;
|
||||
act_t act;
|
||||
void* a_arg;
|
||||
fun_t fun;
|
||||
void* arg;
|
||||
};
|
||||
|
||||
struct hip_cb_table_t {
|
||||
hip_cb_table_entry_t arr[HIP_API_ID_NUMBER];
|
||||
};
|
||||
|
||||
api_callbacks_table_templ() {
|
||||
memset(&callbacks_table_, 0, sizeof(callbacks_table_));
|
||||
}
|
||||
|
||||
bool set_activity(uint32_t id, act_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].act = fun;
|
||||
callbacks_table_.arr[id].a_arg = arg;
|
||||
cb_release(id);
|
||||
} else {
|
||||
ret = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool set_callback(uint32_t id, fun_t fun, void* arg) {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
bool ret = true;
|
||||
if (id < HIP_API_ID_NUMBER) {
|
||||
cb_sync(id);
|
||||
callbacks_table_.arr[id].fun = fun;
|
||||
callbacks_table_.arr[id].arg = arg;
|
||||
cb_release(id);
|
||||
} else {
|
||||
ret = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline hip_cb_table_entry_t& entry(const uint32_t& id) {
|
||||
return callbacks_table_.arr[id];
|
||||
}
|
||||
|
||||
inline void sem_sync(const uint32_t& id) {
|
||||
sem_increment(id);
|
||||
if (entry(id).sync.load() == true) sync_wait(id);
|
||||
}
|
||||
|
||||
inline void sem_release(const uint32_t& id) {
|
||||
sem_decrement(id);
|
||||
}
|
||||
|
||||
private:
|
||||
inline void cb_sync(const uint32_t& id) {
|
||||
entry(id).sync.store(true);
|
||||
while (entry(id).sem.load() != 0) {}
|
||||
}
|
||||
|
||||
inline void cb_release(const uint32_t& id) {
|
||||
entry(id).sync.store(false);
|
||||
}
|
||||
|
||||
inline void sem_increment(const uint32_t& id) {
|
||||
const uint32_t prev = entry(id).sem.fetch_add(1);
|
||||
if (prev == UINT32_MAX) {
|
||||
std::cerr << "sem overflow id = " << id << std::endl << std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
inline void sem_decrement(const uint32_t& id) {
|
||||
const uint32_t prev = entry(id).sem.fetch_sub(1);
|
||||
if (prev == 0) {
|
||||
std::cerr << "sem corrupted id = " << id << std::endl << std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void sync_wait(const uint32_t& id) {
|
||||
sem_decrement(id);
|
||||
while (entry(id).sync.load() == true) {}
|
||||
sem_increment(id);
|
||||
}
|
||||
|
||||
mutex_t mutex_;
|
||||
hip_cb_table_t callbacks_table_;
|
||||
};
|
||||
|
||||
|
||||
#if USE_PROF_API
|
||||
#include <prof_protocol.h>
|
||||
|
||||
static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API;
|
||||
typedef activity_record_t hip_api_record_t;
|
||||
typedef activity_rtapi_callback_t hip_api_callback_t;
|
||||
typedef activity_sync_callback_t hip_act_callback_t;
|
||||
|
||||
// HIP API callbacks spawner object macro
|
||||
#define HIP_CB_SPAWNER_OBJECT(CB_ID) \
|
||||
hip_api_data_t api_data{}; \
|
||||
INIT_CB_ARGS_DATA(CB_ID, api_data); \
|
||||
api_callbacks_spawner_t<HIP_API_ID_##CB_ID> __api_tracer(HIP_API_ID_##CB_ID, api_data);
|
||||
|
||||
typedef api_callbacks_table_templ<hip_api_record_t,
|
||||
hip_api_callback_t,
|
||||
hip_act_callback_t> api_callbacks_table_t;
|
||||
extern api_callbacks_table_t callbacks_table;
|
||||
|
||||
template <int cid_>
|
||||
class api_callbacks_spawner_t {
|
||||
public:
|
||||
api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) :
|
||||
api_data_(api_data),
|
||||
record_({})
|
||||
{
|
||||
if (cid_ >= HIP_API_ID_NUMBER) {
|
||||
fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_);
|
||||
abort();
|
||||
}
|
||||
callbacks_table.sem_sync(cid_);
|
||||
|
||||
act = entry(cid_).act;
|
||||
a_arg = entry(cid_).a_arg;
|
||||
fun = entry(cid_).fun;
|
||||
arg = entry(cid_).arg;
|
||||
|
||||
api_data_.phase = 0;
|
||||
if (act != NULL) act(cid_, &record_, &api_data_, a_arg);
|
||||
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg);
|
||||
}
|
||||
|
||||
~api_callbacks_spawner_t() {
|
||||
api_data_.phase = 1;
|
||||
if (act != NULL) act(cid_, &record_, &api_data_, a_arg);
|
||||
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg);
|
||||
|
||||
callbacks_table.sem_release(cid_);
|
||||
}
|
||||
|
||||
private:
|
||||
inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) {
|
||||
return callbacks_table.entry(id);
|
||||
}
|
||||
|
||||
hip_api_data_t& api_data_;
|
||||
hip_api_record_t record_;
|
||||
|
||||
hip_act_callback_t act;
|
||||
void* a_arg;
|
||||
hip_api_callback_t fun;
|
||||
void* arg;
|
||||
};
|
||||
|
||||
template <>
|
||||
class api_callbacks_spawner_t<HIP_API_ID_NUMBER> {
|
||||
public:
|
||||
api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) {}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0)
|
||||
|
||||
class api_callbacks_table_t {
|
||||
public:
|
||||
typedef void* act_t;
|
||||
typedef void* fun_t;
|
||||
bool set_activity(uint32_t id, act_t fun, void* arg) { return false; }
|
||||
bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _HIP_PROF_API_H
|
||||
@@ -1,296 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
// Stream
|
||||
//
|
||||
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
|
||||
enum queue_priority
|
||||
{
|
||||
priority_high = 0,
|
||||
priority_normal = 0,
|
||||
priority_low = 0
|
||||
};
|
||||
#else
|
||||
enum queue_priority
|
||||
{
|
||||
priority_high = Kalmar::priority_high,
|
||||
priority_normal = Kalmar::priority_normal,
|
||||
priority_low = Kalmar::priority_low
|
||||
};
|
||||
#endif
|
||||
|
||||
//---
|
||||
hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flags, int priority) {
|
||||
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
if (ctx) {
|
||||
if (HIP_FORCE_NULL_STREAM) {
|
||||
*stream = 0;
|
||||
} else if( NULL == stream ){
|
||||
e = hipErrorInvalidValue;
|
||||
} else {
|
||||
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
|
||||
|
||||
// TODO - se try-catch loop to detect memory exception?
|
||||
//
|
||||
// Note this is an execute_any_order queue,
|
||||
// CUDA stream behavior is that all kernels submitted will automatically
|
||||
// wait for prev to complete, this behaviour will be mainatined by
|
||||
// hipModuleLaunchKernel. execute_any_order will help
|
||||
// hipExtModuleLaunchKernel , which uses a special flag
|
||||
|
||||
{
|
||||
// Obtain mutex access to the device critical data, release by destructor
|
||||
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
|
||||
|
||||
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
|
||||
#else
|
||||
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_any_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
|
||||
#endif
|
||||
|
||||
ctxCrit->addStream(istream);
|
||||
*stream = istream;
|
||||
}
|
||||
tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str());
|
||||
}
|
||||
|
||||
} else {
|
||||
e = hipErrorInvalidDevice;
|
||||
}
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
|
||||
if(flags == hipStreamDefault || flags == hipStreamNonBlocking)
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority_normal));
|
||||
else
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
//---
|
||||
hipError_t hipStreamCreate(hipStream_t* stream) {
|
||||
HIP_INIT_API(hipStreamCreate, stream);
|
||||
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, hipStreamDefault, priority_normal));
|
||||
}
|
||||
|
||||
//---
|
||||
hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
|
||||
HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority);
|
||||
|
||||
// clamp priority to range [priority_high:priority_low]
|
||||
priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority));
|
||||
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority));
|
||||
}
|
||||
|
||||
//---
|
||||
hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
|
||||
HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority);
|
||||
|
||||
if (leastPriority != NULL) *leastPriority = priority_low;
|
||||
if (greatestPriority != NULL) *greatestPriority = priority_high;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
|
||||
HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags);
|
||||
|
||||
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
|
||||
|
||||
auto ecd = event->locked_copyCrit();
|
||||
if (event->_flags & hipEventInterprocess) {
|
||||
// this is an IPC event
|
||||
if (ecd._ipc_shmem->read_index >= 0) {
|
||||
// we have at least one recorded event, so proceed
|
||||
stream->locked_streamWaitEvent(ecd);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) {
|
||||
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
|
||||
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
|
||||
: hc::hcWaitModeActive);
|
||||
} else {
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
// This will use create_blocking_marker to wait on the specified queue.
|
||||
stream->locked_streamWaitEvent(ecd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamQuery(hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipStreamQuery, TRACE_QUERY, stream);
|
||||
|
||||
// Use default stream if 0 specified:
|
||||
if (stream == hipStreamNull) {
|
||||
ihipCtx_t* device = ihipGetTlsDefaultCtx();
|
||||
stream = device->_defaultStream;
|
||||
}
|
||||
|
||||
bool isEmpty = 0;
|
||||
|
||||
{
|
||||
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
|
||||
isEmpty = crit->_av.get_is_empty();
|
||||
}
|
||||
|
||||
hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady;
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamSynchronize(hipStream_t stream) {
|
||||
HIP_INIT_SPECIAL_API(hipStreamSynchronize, TRACE_SYNC, stream);
|
||||
|
||||
return ihipLogStatus(ihipStreamSynchronize(tls, stream));
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
/**
|
||||
* @return #hipSuccess, #hipErrorInvalidHandle
|
||||
*/
|
||||
hipError_t hipStreamDestroy(hipStream_t stream) {
|
||||
HIP_INIT_API(hipStreamDestroy, stream);
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
|
||||
//--- Drain the stream:
|
||||
if (stream == NULL) {
|
||||
if (!HIP_FORCE_NULL_STREAM) {
|
||||
e = hipErrorInvalidHandle;
|
||||
}
|
||||
} else {
|
||||
stream->locked_wait();
|
||||
|
||||
ihipCtx_t* ctx = stream->getCtx();
|
||||
|
||||
if (ctx) {
|
||||
ctx->locked_removeStream(stream);
|
||||
delete stream;
|
||||
} else {
|
||||
e = hipErrorInvalidHandle;
|
||||
}
|
||||
}
|
||||
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) {
|
||||
HIP_INIT_API(hipStreamGetFlags, stream, flags);
|
||||
|
||||
if (flags == NULL) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
} else if (stream == hipStreamNull) {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
} else {
|
||||
*flags = stream->_flags;
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//--
|
||||
hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
|
||||
HIP_INIT_API(hipStreamGetPriority, stream, priority);
|
||||
|
||||
if (priority == NULL) {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
} else if (stream == hipStreamNull) {
|
||||
return ihipLogStatus(hipErrorInvalidHandle);
|
||||
} else {
|
||||
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
|
||||
*priority = 0;
|
||||
#else
|
||||
LockedAccessor_StreamCrit_t crit(stream->criticalData());
|
||||
*priority = crit->_av.get_queue_priority();
|
||||
#endif
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
|
||||
unsigned int flags) {
|
||||
HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags);
|
||||
|
||||
auto stream_original{stream};
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
|
||||
if (!stream) return hipErrorInvalidValue;
|
||||
|
||||
LockedAccessor_StreamCrit_t cs{stream->criticalData()};
|
||||
|
||||
// create first marker
|
||||
auto cf = cs->_av.create_marker(hc::no_scope);
|
||||
// get its signal
|
||||
auto signal = *reinterpret_cast<hsa_signal_t*>(cf.get_native_handle());
|
||||
// increment its signal value
|
||||
hsa_signal_add_relaxed(signal, 1);
|
||||
|
||||
// create callback that can be passed to hsa_amd_signal_async_handler
|
||||
// this function will call the user's callback, then sets first packet's signal to 0 to indicate completion
|
||||
auto t{new std::function<void()>{[=]() {
|
||||
callback(stream_original, hipSuccess, userData);
|
||||
hsa_signal_store_relaxed(signal, 0);
|
||||
}}};
|
||||
|
||||
// register above callback with HSA runtime to be called when first packet's signal
|
||||
// is decremented from 2 to 1 by CP (or it is already at 1)
|
||||
hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1,
|
||||
[](hsa_signal_value_t x, void* p) {
|
||||
(*static_cast<decltype(t)>(p))();
|
||||
delete static_cast<decltype(t)>(p);
|
||||
return false;
|
||||
}, t);
|
||||
|
||||
// create additional marker that blocks on the first one
|
||||
cs->_av.create_blocking_marker(cf, hc::no_scope);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include "hip_surface.h"
|
||||
|
||||
static std::map<hipSurfaceObject_t, hipSurface*> surfaceHash;
|
||||
|
||||
void saveSurfaceInfo(const hipSurface* pSurface, const hipResourceDesc* pResDesc) {
|
||||
if (pResDesc != nullptr) {
|
||||
memcpy((void*)&(pSurface->resDesc), (void*)pResDesc, sizeof(hipResourceDesc));
|
||||
}
|
||||
}
|
||||
|
||||
// Surface Object APIs
|
||||
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
|
||||
const hipResourceDesc* pResDesc) {
|
||||
HIP_INIT_API(hipCreateSurfaceObject, pSurfObject, pResDesc);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipSurface* pSurface = (hipSurface*)malloc(sizeof(hipSurface));
|
||||
if (pSurface != nullptr) {
|
||||
memset(pSurface, 0, sizeof(hipSurface));
|
||||
saveSurfaceInfo(pSurface, pResDesc);
|
||||
}
|
||||
|
||||
switch (pResDesc->resType) {
|
||||
case hipResourceTypeArray:
|
||||
pSurface->array = pResDesc->res.array.array;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
unsigned int* surfObj;
|
||||
hipMalloc((void**)&surfObj, sizeof(hipArray));
|
||||
hipMemcpy(surfObj, (void*)pResDesc->res.array.array, sizeof(hipArray),
|
||||
hipMemcpyHostToDevice);
|
||||
*pSurfObject = (hipSurfaceObject_t)surfObj;
|
||||
surfaceHash[*pSurfObject] = pSurface;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
|
||||
HIP_INIT_API(hipDestroySurfaceObject, surfaceObject);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipSurface* pSurface = surfaceHash[surfaceObject];
|
||||
if (pSurface != nullptr) {
|
||||
free(pSurface);
|
||||
surfaceHash.erase(surfaceObject);
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_H
|
||||
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_H
|
||||
|
||||
#include <hip/hcc_detail/hip_surface_types.h>
|
||||
struct hipSurface {
|
||||
hipArray* array;
|
||||
hipResourceDesc resDesc;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,851 +0,0 @@
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include "hip_texture.h"
|
||||
|
||||
static std::map<hipTextureObject_t, hipTexture*> textureHash;
|
||||
|
||||
void saveTextureInfo(const hipTexture* pTexture, const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc, const hipResourceViewDesc* pResViewDesc) {
|
||||
if (pResDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->resDesc), (void*)pResDesc, sizeof(hipResourceDesc));
|
||||
}
|
||||
|
||||
if (pTexDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->texDesc), (void*)pTexDesc, sizeof(hipTextureDesc));
|
||||
}
|
||||
|
||||
if (pResViewDesc != nullptr) {
|
||||
memcpy((void*)&(pTexture->resViewDesc), (void*)pResViewDesc, sizeof(hipResourceViewDesc));
|
||||
}
|
||||
}
|
||||
|
||||
void getDrvChannelOrderAndType(const enum hipArray_Format Format, enum hipTextureReadMode readMode, unsigned int NumChannels,
|
||||
hsa_ext_image_channel_order_t* channelOrder,
|
||||
hsa_ext_image_channel_type_t* channelType) {
|
||||
switch (Format) {
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
|
||||
break;
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
|
||||
break;
|
||||
case HIP_AD_FORMAT_UNSIGNED_INT32:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
|
||||
break;
|
||||
case HIP_AD_FORMAT_SIGNED_INT32:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
||||
break;
|
||||
case HIP_AD_FORMAT_HALF:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
|
||||
break;
|
||||
case HIP_AD_FORMAT_FLOAT:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (NumChannels == 4) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
|
||||
} else if (NumChannels == 2) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
|
||||
} else if (NumChannels == 1) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
||||
}
|
||||
}
|
||||
void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode,
|
||||
hsa_ext_image_channel_order_t* channelOrder,
|
||||
hsa_ext_image_channel_type_t* channelType) {
|
||||
if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGB;
|
||||
} else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
|
||||
} else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) {
|
||||
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
|
||||
} else {
|
||||
}
|
||||
|
||||
switch (desc.f) {
|
||||
case hipChannelFormatKindUnsigned:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindSigned:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
|
||||
break;
|
||||
case 8:
|
||||
*channelType = readMode == hipReadModeNormalizedFloat
|
||||
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
|
||||
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
|
||||
break;
|
||||
default:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindFloat:
|
||||
switch (desc.x) {
|
||||
case 32:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
||||
break;
|
||||
case 16:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
|
||||
break;
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
|
||||
}
|
||||
break;
|
||||
case hipChannelFormatKindNone:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void fillSamplerDescriptor(hsa_ext_sampler_descriptor_t& samplerDescriptor,
|
||||
enum hipTextureAddressMode addressMode,
|
||||
enum hipTextureFilterMode filterMode, int normalizedCoords) {
|
||||
if (normalizedCoords) {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED;
|
||||
} else {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED;
|
||||
}
|
||||
|
||||
switch (filterMode) {
|
||||
case hipFilterModePoint:
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
|
||||
break;
|
||||
case hipFilterModeLinear:
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_LINEAR;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (addressMode) {
|
||||
case hipAddressModeWrap:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT;
|
||||
break;
|
||||
case hipAddressModeClamp:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
|
||||
break;
|
||||
case hipAddressModeMirror:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT;
|
||||
break;
|
||||
case hipAddressModeBorder:
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool getHipTextureObject(hipTextureObject_t* pTexObject, hsa_ext_image_t& image,
|
||||
hsa_ext_sampler_t sampler) {
|
||||
unsigned int* texSRD;
|
||||
hipMalloc((void**)&texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
|
||||
hipMemcpy(texSRD, (void*)image.handle, HIP_IMAGE_OBJECT_SIZE_DWORD * 4,
|
||||
hipMemcpyDeviceToDevice);
|
||||
hipMemcpy(texSRD + HIP_SAMPLER_OBJECT_OFFSET_DWORD, (void*)sampler.handle,
|
||||
HIP_SAMPLER_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice);
|
||||
*pTexObject = (hipTextureObject_t)texSRD;
|
||||
|
||||
#ifdef DEBUG
|
||||
unsigned int* srd = (unsigned int*)malloc(HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
|
||||
hipMemcpy(srd, texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToHost);
|
||||
printf("New SRD: \n");
|
||||
for (int i = 0; i < HIP_TEXTURE_OBJECT_SIZE_DWORD; i++) {
|
||||
printf("SRD[%d]: %x\n", i, srd[i]);
|
||||
}
|
||||
printf("\n");
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
// Texture Object APIs
|
||||
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
|
||||
const hipTextureDesc* pTexDesc,
|
||||
const hipResourceViewDesc* pResViewDesc) {
|
||||
HIP_INIT_API(hipCreateTextureObject, pTexObject, pResDesc, pTexDesc, pResViewDesc);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
saveTextureInfo(pTexture, pResDesc, pTexDesc, pResViewDesc);
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
void* devPtr = nullptr;
|
||||
size_t pitch = 0;
|
||||
switch (pResDesc->resType) {
|
||||
case hipResourceTypeArray:
|
||||
devPtr = pResDesc->res.array.array->data;
|
||||
imageDescriptor.width = pResDesc->res.array.array->width;
|
||||
imageDescriptor.height = pResDesc->res.array.array->height;
|
||||
switch (pResDesc->res.array.array->type) {
|
||||
case hipArrayLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = pResDesc->res.array.array->depth;
|
||||
break;
|
||||
case hipArrayCubemap:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
imageDescriptor.depth = pResDesc->res.array.array->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
break;
|
||||
case hipArraySurfaceLoadStore:
|
||||
case hipArrayTextureGather:
|
||||
case hipArrayDefault:
|
||||
default:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
break;
|
||||
}
|
||||
getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode,
|
||||
&channelOrder, &channelType);
|
||||
break;
|
||||
case hipResourceTypeMipmappedArray:
|
||||
devPtr = pResDesc->res.mipmap.mipmap->data;
|
||||
imageDescriptor.width = pResDesc->res.mipmap.mipmap->width;
|
||||
imageDescriptor.height = pResDesc->res.mipmap.mipmap->height;
|
||||
imageDescriptor.depth = pResDesc->res.mipmap.mipmap->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
getChannelOrderAndType(pResDesc->res.mipmap.mipmap->desc, pTexDesc->readMode,
|
||||
&channelOrder, &channelType);
|
||||
break;
|
||||
case hipResourceTypeLinear:
|
||||
devPtr = pResDesc->res.linear.devPtr;
|
||||
imageDescriptor.width = pResDesc->res.linear.sizeInBytes/((pResDesc->res.linear.desc.x + pResDesc->res.linear.desc.y + pResDesc->res.linear.desc.z + pResDesc->res.linear.desc.w)/8);
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry =
|
||||
HSA_EXT_IMAGE_GEOMETRY_1D; // ? HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR
|
||||
getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, &channelOrder,
|
||||
&channelType);
|
||||
break;
|
||||
case hipResourceTypePitch2D:
|
||||
devPtr = pResDesc->res.pitch2D.devPtr;
|
||||
imageDescriptor.width = pResDesc->res.pitch2D.width;
|
||||
imageDescriptor.height = pResDesc->res.pitch2D.height;
|
||||
imageDescriptor.depth = 0;
|
||||
imageDescriptor.array_size = 0;
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
pitch = pResDesc->res.pitch2D.pitchInBytes;
|
||||
getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode,
|
||||
&channelOrder, &channelType);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode,
|
||||
pTexDesc->normalizedCoords);
|
||||
if(hipResourceTypeLinear == pResDesc->resType) {
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
|
||||
} else if(!pTexDesc->normalizedCoords) {
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
|
||||
}
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
if(hipResourceTypePitch2D != pResDesc->resType)
|
||||
pitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
|
||||
*agent, &imageDescriptor, devPtr, permission,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS !=
|
||||
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
free(pTexture);
|
||||
return ihipLogStatus(hipErrorRuntimeOther);
|
||||
}
|
||||
|
||||
getHipTextureObject(pTexObject, pTexture->image, pTexture->sampler);
|
||||
|
||||
textureHash[*pTexObject] = pTexture;
|
||||
}
|
||||
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(hipDestroyTextureObject, textureObject);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr) {
|
||||
hsa_ext_image_destroy(*agent, pTexture->image);
|
||||
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
|
||||
free(pTexture);
|
||||
textureHash.erase(textureObject);
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, textureObject);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pResDesc != nullptr) {
|
||||
memcpy((void*)pResDesc, (void*)&(pTexture->resDesc), sizeof(hipResourceDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, textureObject);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pResViewDesc != nullptr) {
|
||||
memcpy((void*)pResViewDesc, (void*)&(pTexture->resViewDesc),
|
||||
sizeof(hipResourceViewDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
|
||||
hipTextureObject_t textureObject) {
|
||||
HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, textureObject);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr && pTexDesc != nullptr) {
|
||||
memcpy((void*)pTexDesc, (void*)&(pTexture->texDesc), sizeof(hipTextureDesc));
|
||||
}
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
// Texture Reference APIs
|
||||
hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t size, textureReference* tex) {
|
||||
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
enum hipTextureAddressMode addressMode = tex->addressMode[0];
|
||||
enum hipTextureFilterMode filterMode = tex->filterMode;
|
||||
int normalizedCoords = tex->normalized;
|
||||
hipTextureObject_t& textureObject = tex->textureObject;
|
||||
if(offset != nullptr)
|
||||
*offset = 0;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
assert(dim == hipTextureType1D);
|
||||
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
|
||||
imageDescriptor.width = size;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
if (NULL == desc) {
|
||||
getDrvChannelOrderAndType(tex->format, readMode, tex->numChannels, &channelOrder, &channelType);
|
||||
} else {
|
||||
getChannelOrderAndType(*desc, readMode, &channelOrder, &channelType);
|
||||
}
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
|
||||
if (normalizedCoords) {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED;
|
||||
} else {
|
||||
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED;
|
||||
}
|
||||
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(size, IMAGE_PITCH_ALIGNMENT);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
|
||||
*agent, &imageDescriptor, devPtr, permission,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS !=
|
||||
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
free(pTexture);
|
||||
return hipErrorRuntimeOther;
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
pTexture->devPtr = (void*) devPtr;
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t size) {
|
||||
HIP_INIT_API(hipBindTexture, offset, tex, devPtr, desc, size);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
|
||||
size, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
|
||||
const void* devPtr, const struct hipChannelFormatDesc* desc,
|
||||
size_t width, size_t height, textureReference* tex, size_t pitch) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
enum hipTextureAddressMode addressMode = tex->addressMode[0];
|
||||
enum hipTextureFilterMode filterMode = tex->filterMode;
|
||||
int normalizedCoords = tex->normalized;
|
||||
hipTextureObject_t& textureObject = tex->textureObject;
|
||||
if(offset != nullptr)
|
||||
*offset = 0;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
assert(dim == hipTextureType2D);
|
||||
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.width = width;
|
||||
imageDescriptor.height = height;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
|
||||
if (NULL == desc) {
|
||||
getDrvChannelOrderAndType(tex->format, readMode, tex->numChannels, &channelOrder, &channelType);
|
||||
} else {
|
||||
getChannelOrderAndType(*desc, readMode, &channelOrder, &channelType);
|
||||
}
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
|
||||
if(!normalizedCoords) {
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
|
||||
}
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
if( 0 == pitch)
|
||||
pitch = getElementSize(channelOrder, channelType) * alignUp(width, IMAGE_PITCH_ALIGNMENT);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
|
||||
*agent, &imageDescriptor, devPtr, permission,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS !=
|
||||
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
free(pTexture);
|
||||
return hipErrorRuntimeOther;
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
pTexture->devPtr = (void*) devPtr;
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
|
||||
const hipChannelFormatDesc* desc, size_t width, size_t height,
|
||||
size_t pitch) {
|
||||
HIP_INIT_API(hipBindTexture2D, offset, tex, devPtr, desc, width, height, pitch);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
//TODO: Fix when HSA accepts user defined pitch
|
||||
if(pitch % 64) pitch =0;
|
||||
|
||||
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, offset, devPtr,
|
||||
desc, width, height, tex, pitch);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode,
|
||||
hipArray_const_t array,
|
||||
const struct hipChannelFormatDesc& desc,
|
||||
textureReference* tex) {
|
||||
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
enum hipTextureAddressMode addressMode = tex->addressMode[0];
|
||||
enum hipTextureFilterMode filterMode = tex->filterMode;
|
||||
int normalizedCoords = tex->normalized;
|
||||
hipTextureObject_t& textureObject = tex->textureObject;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
|
||||
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
|
||||
if (pTexture != nullptr) {
|
||||
memset(pTexture, 0, sizeof(hipTexture));
|
||||
}
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor;
|
||||
|
||||
imageDescriptor.width = array->width;
|
||||
imageDescriptor.height = array->height;
|
||||
imageDescriptor.depth = array->depth;
|
||||
imageDescriptor.array_size = 0;
|
||||
|
||||
switch (dim) {
|
||||
case hipTextureType1D:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.depth = 1;
|
||||
break;
|
||||
case hipTextureType2D:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor.depth = 1;
|
||||
break;
|
||||
case hipTextureType3D:
|
||||
case hipTextureTypeCubemap:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
break;
|
||||
case hipTextureType1DLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
|
||||
imageDescriptor.height = 1;
|
||||
imageDescriptor.array_size = array->height;
|
||||
break;
|
||||
case hipTextureType2DLayered:
|
||||
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor.depth = 1;
|
||||
imageDescriptor.array_size = array->depth;
|
||||
break;
|
||||
case hipTextureTypeCubemapLayered:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
hsa_ext_image_channel_order_t channelOrder;
|
||||
hsa_ext_image_channel_type_t channelType;
|
||||
if (array->isDrv) {
|
||||
getDrvChannelOrderAndType(array->Format, readMode, array->NumChannels,
|
||||
&channelOrder, &channelType);
|
||||
} else {
|
||||
getChannelOrderAndType(desc, readMode, &channelOrder, &channelType);
|
||||
}
|
||||
imageDescriptor.format.channel_order = channelOrder;
|
||||
imageDescriptor.format.channel_type = channelType;
|
||||
|
||||
hsa_ext_sampler_descriptor_t samplerDescriptor;
|
||||
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
|
||||
if(!normalizedCoords) {
|
||||
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
|
||||
}
|
||||
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
|
||||
|
||||
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT);
|
||||
|
||||
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
|
||||
*agent, &imageDescriptor, array->data, permission,
|
||||
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) ||
|
||||
HSA_STATUS_SUCCESS !=
|
||||
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
|
||||
return hipErrorRuntimeOther;
|
||||
}
|
||||
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
|
||||
pTexture->devPtr = (void*) array;
|
||||
textureHash[textureObject] = pTexture;
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
|
||||
const hipChannelFormatDesc* desc) {
|
||||
HIP_INIT_API(hipBindTextureToArray, tex, array, desc);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status =
|
||||
ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, *desc, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
|
||||
hipMipmappedArray_const_t mipmappedArray,
|
||||
const hipChannelFormatDesc* desc) {
|
||||
HIP_INIT_API(hipBindTextureToMipmappedArray, tex, mipmappedArray, desc);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
|
||||
hipError_t hip_status = hipSuccess;
|
||||
TlsData* tls=tls_get_ptr();
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
hc::accelerator acc = ctx->getDevice()->_acc;
|
||||
auto device = ctx->getWriteableDevice();
|
||||
|
||||
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
|
||||
hipTexture* pTexture = textureHash[textureObject];
|
||||
if (pTexture != nullptr) {
|
||||
hsa_ext_image_destroy(*agent, pTexture->image);
|
||||
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
|
||||
free(pTexture);
|
||||
textureHash.erase(textureObject);
|
||||
}
|
||||
}
|
||||
|
||||
return hip_status;
|
||||
}
|
||||
|
||||
hipError_t hipUnbindTexture(const textureReference* tex) {
|
||||
HIP_INIT_API(hipUnbindTexture, tex);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
hip_status = ihipUnbindTextureImpl(tex->textureObject);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) {
|
||||
HIP_INIT_API(hipGetChannelDesc, desc, array);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
*desc = array->desc;
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) {
|
||||
HIP_INIT_API(hipGetTextureAlignmentOffset, offset, tex);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
if(offset != nullptr)
|
||||
*offset = 0;
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) {
|
||||
HIP_INIT_API(hipGetTextureReference, tex, symbol);
|
||||
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if (ctx) {
|
||||
}
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) {
|
||||
HIP_INIT_API(hipTexRefSetFormat, tex, fmt, NumPackedComponents);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
tex->format = fmt;
|
||||
tex->numChannels = NumPackedComponents;
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) {
|
||||
HIP_INIT_API(hipTexRefSetFlags, tex, flags);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
if(flags == HIP_TRSF_READ_AS_INTEGER)
|
||||
tex->readMode = hipReadModeElementType;
|
||||
else if(flags == HIP_TRSF_NORMALIZED_COORDINATES)
|
||||
tex->normalized = flags;
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) {
|
||||
HIP_INIT_API(hipTexRefSetFilterMode, tex, fm);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
tex->filterMode = fm;
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) {
|
||||
HIP_INIT_API(hipTexRefSetAddressMode, tex, dim, am);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
tex->addressMode[dim] = am;
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) {
|
||||
HIP_INIT_API(hipTexRefGetAddressMode,am, &tex, dim);
|
||||
|
||||
if ((am == nullptr) || (dim >= 3))
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
*am = tex.addressMode[dim];
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) {
|
||||
HIP_INIT_API(hipTexRefSetArray, tex, array, flags);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
|
||||
hip_status = ihipBindTextureToArrayImpl(tls, array->textureType, tex->readMode, array,
|
||||
array->desc, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) {
|
||||
HIP_INIT_API(hipTexRefGetArray, array, &tex);
|
||||
|
||||
if (array == nullptr)
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
hipTexture* pTexture = textureHash[tex.textureObject];
|
||||
if((pTexture == nullptr) || (hipResourceTypeArray != pTexture->resDesc.resType))
|
||||
return ihipLogStatus(hipErrorInvalidImage);
|
||||
|
||||
if (pTexture->devPtr == nullptr)
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
|
||||
*array = reinterpret_cast<hipArray_t>(pTexture->devPtr);
|
||||
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
|
||||
size_t size) {
|
||||
HIP_INIT_API(hipTexRefSetAddress, offset, tex, devPtr, size);
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, tex->readMode, offset, devPtr, NULL,
|
||||
size, tex);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) {
|
||||
HIP_INIT_API(hipTexRefGetAddress,dev_ptr, &tex);
|
||||
|
||||
if (dev_ptr == nullptr)
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
|
||||
hipTexture* pTexture = textureHash[tex.textureObject];
|
||||
if (pTexture == nullptr)
|
||||
return ihipLogStatus(hipErrorInvalidImage);
|
||||
|
||||
if (pTexture->devPtr == nullptr)
|
||||
return ihipLogStatus(hipErrorUnknown);
|
||||
|
||||
*dev_ptr = reinterpret_cast<hipDeviceptr_t>(pTexture->devPtr);
|
||||
return ihipLogStatus(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
|
||||
hipDeviceptr_t devPtr, size_t pitch) {
|
||||
HIP_INIT_API(hipTexRefSetAddress2D, tex, desc, devPtr, pitch);
|
||||
size_t offset;
|
||||
hipError_t hip_status = hipSuccess;
|
||||
// TODO: hipReadModeElementType is default.
|
||||
//TODO: Fix when HSA accepts user defined pitch
|
||||
if(pitch % 64) pitch =0;
|
||||
|
||||
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, tex->readMode, &offset, devPtr,
|
||||
NULL, desc->Width, desc->Height, tex, pitch);
|
||||
return ihipLogStatus(hip_status);
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
|
||||
#define HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
|
||||
|
||||
#include <hip/hcc_detail/texture_types.h>
|
||||
|
||||
struct hipTexture {
|
||||
hipResourceDesc resDesc;
|
||||
hipTextureDesc texDesc;
|
||||
hipResourceViewDesc resViewDesc;
|
||||
hsa_ext_image_t image;
|
||||
hsa_ext_sampler_t sampler;
|
||||
void* devPtr;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
|
||||
#define HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <list>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,634 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "../include/hip/hiprtc.h"
|
||||
#include "code_object_bundle.inl"
|
||||
#include "../include/hip/hcc_detail/elfio/elfio.hpp"
|
||||
#include "../include/hip/hcc_detail/program_state.hpp"
|
||||
|
||||
#include "../lpl_ca/pstreams/pstream.h"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cxxabi.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <future>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <iostream>
|
||||
#include <sys/stat.h>
|
||||
|
||||
extern "C" const char* hiprtcGetErrorString(hiprtcResult x)
|
||||
{
|
||||
switch (x) {
|
||||
case HIPRTC_SUCCESS:
|
||||
return "HIPRTC_SUCCESS";
|
||||
case HIPRTC_ERROR_OUT_OF_MEMORY:
|
||||
return "HIPRTC_ERROR_OUT_OF_MEMORY";
|
||||
case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE:
|
||||
return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE";
|
||||
case HIPRTC_ERROR_INVALID_INPUT:
|
||||
return "HIPRTC_ERROR_INVALID_INPUT";
|
||||
case HIPRTC_ERROR_INVALID_PROGRAM:
|
||||
return "HIPRTC_ERROR_INVALID_PROGRAM";
|
||||
case HIPRTC_ERROR_INVALID_OPTION:
|
||||
return "HIPRTC_ERROR_INVALID_OPTION";
|
||||
case HIPRTC_ERROR_COMPILATION:
|
||||
return "HIPRTC_ERROR_COMPILATION";
|
||||
case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE:
|
||||
return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE";
|
||||
case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION:
|
||||
return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION";
|
||||
case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION:
|
||||
return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION";
|
||||
case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID:
|
||||
return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID";
|
||||
case HIPRTC_ERROR_INTERNAL_ERROR:
|
||||
return "HIPRTC_ERROR_INTERNAL_ERROR";
|
||||
default: throw std::logic_error{"Invalid HIPRTC result."};
|
||||
};
|
||||
}
|
||||
|
||||
namespace hip_impl {
|
||||
inline bool create_directory(const std::string& path) {
|
||||
mode_t mode = 0755;
|
||||
int ret = mkdir(path.c_str(), mode);
|
||||
if (ret == 0) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool fileExists (const std::string& name) {
|
||||
struct stat buffer;
|
||||
return (stat (name.c_str(), &buffer) == 0);
|
||||
}
|
||||
} // namespace hip_impl
|
||||
|
||||
namespace
|
||||
{
|
||||
char* demangle(const char* x)
|
||||
{
|
||||
if (!x) return nullptr;
|
||||
|
||||
int s{};
|
||||
char* tmp = abi::__cxa_demangle(x, nullptr, nullptr, &s);
|
||||
|
||||
if (s != 0) return nullptr;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
} // Unnamed namespace.
|
||||
|
||||
namespace
|
||||
{
|
||||
struct Symbol {
|
||||
std::string name;
|
||||
ELFIO::Elf64_Addr value = 0;
|
||||
ELFIO::Elf_Xword size = 0;
|
||||
ELFIO::Elf_Half sect_idx = 0;
|
||||
std::uint8_t bind = 0;
|
||||
std::uint8_t type = 0;
|
||||
std::uint8_t other = 0;
|
||||
};
|
||||
|
||||
inline
|
||||
Symbol read_symbol(const ELFIO::symbol_section_accessor& section,
|
||||
unsigned int idx) {
|
||||
assert(idx < section.get_symbols_num());
|
||||
|
||||
Symbol r;
|
||||
section.get_symbol(
|
||||
idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other);
|
||||
|
||||
return r;
|
||||
}
|
||||
} // Unnamed namespace.
|
||||
|
||||
struct _hiprtcProgram {
|
||||
// DATA - STATICS
|
||||
static std::vector<std::unique_ptr<_hiprtcProgram>> programs;
|
||||
static std::mutex mtx;
|
||||
|
||||
// DATA
|
||||
std::vector<std::pair<std::string, std::string>> headers;
|
||||
std::vector<std::pair<std::string, std::string>> names;
|
||||
std::vector<std::string> loweredNames;
|
||||
std::vector<char> elf;
|
||||
std::string source;
|
||||
std::string name;
|
||||
std::string log;
|
||||
bool compiled;
|
||||
|
||||
// STATICS
|
||||
static
|
||||
hiprtcResult destroy(_hiprtcProgram* p)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
lock_guard<mutex> lck{mtx};
|
||||
|
||||
const auto it{find_if(programs.cbegin(), programs.cend(),
|
||||
[=](const unique_ptr<_hiprtcProgram>& x) {
|
||||
return x.get() == p;
|
||||
})};
|
||||
|
||||
if (it == programs.cend()) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
static
|
||||
std::string handleMangledName(std::string name)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
char* demangled = demangle(name.c_str());
|
||||
name.assign(demangled == nullptr ? "" : demangled);
|
||||
free(demangled);
|
||||
|
||||
if (name.empty()) return name;
|
||||
|
||||
if (name.find("void ") == 0) name.erase(0, strlen("void "));
|
||||
|
||||
auto dx{name.find_first_of("(<")};
|
||||
|
||||
if (dx == string::npos) return name;
|
||||
|
||||
if (name[dx] == '<') {
|
||||
auto cnt{1u};
|
||||
do {
|
||||
++dx;
|
||||
cnt += (name[dx] == '<') ? 1 : ((name[dx] == '>') ? -1 : 0);
|
||||
} while (cnt);
|
||||
|
||||
name.erase(++dx);
|
||||
}
|
||||
else name.erase(dx);
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
static
|
||||
_hiprtcProgram* make(std::string s, std::string n,
|
||||
std::vector<std::pair<std::string, std::string>> h)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
unique_ptr<_hiprtcProgram> tmp{new _hiprtcProgram{move(h), {}, {}, {},
|
||||
move(s), move(n), {},
|
||||
false}};
|
||||
|
||||
lock_guard<mutex> lck{mtx};
|
||||
|
||||
programs.push_back(move(tmp));
|
||||
|
||||
return programs.back().get();
|
||||
}
|
||||
|
||||
static
|
||||
bool isValid(_hiprtcProgram* p) noexcept
|
||||
{
|
||||
return std::find_if(programs.cbegin(), programs.cend(),
|
||||
[=](const std::unique_ptr<_hiprtcProgram>& x) {
|
||||
return x.get() == p;
|
||||
}) != programs.cend();
|
||||
}
|
||||
|
||||
// MANIPULATORS
|
||||
bool compile(const std::vector<std::string>& args)
|
||||
{
|
||||
using namespace ELFIO;
|
||||
using namespace redi;
|
||||
using namespace std;
|
||||
|
||||
ipstream compile{args.front(), args, pstreambuf::pstderr};
|
||||
|
||||
constexpr const auto tmp_size{1024u};
|
||||
char tmp[tmp_size]{};
|
||||
while (!compile.eof()) {
|
||||
log.append(tmp, tmp + compile.readsome(tmp, tmp_size));
|
||||
}
|
||||
|
||||
compile.close();
|
||||
|
||||
if (compile.rdbuf()->exited() &&
|
||||
compile.rdbuf()->status() != EXIT_SUCCESS) return false;
|
||||
|
||||
elfio reader;
|
||||
if (!reader.load(args.back())) return false;
|
||||
|
||||
const auto it{find_if(reader.sections.begin(), reader.sections.end(),
|
||||
[](const section* x) {
|
||||
return (x->get_name() == ".hip_fatbin") || (x->get_name() == ".kernel");
|
||||
})};
|
||||
|
||||
if (it == reader.sections.end()) return false;
|
||||
|
||||
hip_impl::Bundled_code_header h{(*it)->get_data()};
|
||||
|
||||
if (bundles(h).empty()) return false;
|
||||
|
||||
elf.assign(bundles(h).back().blob.cbegin(),
|
||||
bundles(h).back().blob.cend());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool readLoweredNames()
|
||||
{
|
||||
using namespace ELFIO;
|
||||
using namespace hip_impl;
|
||||
using namespace std;
|
||||
|
||||
if (names.empty()) return true;
|
||||
|
||||
istringstream blob{string{elf.cbegin(), elf.cend()}};
|
||||
|
||||
elfio reader;
|
||||
|
||||
if (!reader.load(blob)) return false;
|
||||
|
||||
const auto it{find_if(reader.sections.begin(), reader.sections.end(),
|
||||
[](const section* x) {
|
||||
return x->get_type() == SHT_SYMTAB;
|
||||
})};
|
||||
|
||||
ELFIO::symbol_section_accessor symbols{reader, *it};
|
||||
|
||||
auto n{symbols.get_symbols_num()};
|
||||
|
||||
if (n < loweredNames.size()) return false;
|
||||
|
||||
while (n--) {
|
||||
const auto tmp{read_symbol(symbols, n)};
|
||||
|
||||
auto it{find_if(names.cbegin(), names.cend(),
|
||||
[&](const pair<string, string>& x) {
|
||||
return x.second == tmp.name;
|
||||
})};
|
||||
|
||||
if (it == names.cend()) {
|
||||
const auto name{handleMangledName(tmp.name)};
|
||||
|
||||
if (name.empty()) continue;
|
||||
|
||||
it = find_if(names.cbegin(), names.cend(),
|
||||
[&](const pair<string, string>& x) {
|
||||
return x.second == name;
|
||||
});
|
||||
|
||||
if (it == names.cend()) continue;
|
||||
}
|
||||
|
||||
loweredNames[distance(names.cbegin(), it)] = tmp.name;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void replaceExtension(std::string& fileName, const std::string &ext) const {
|
||||
auto res = fileName.rfind('.');
|
||||
auto sloc = fileName.rfind('/'); // slash location
|
||||
if (res != std::string::npos && (res > sloc || sloc == std::string::npos)) {
|
||||
fileName.replace(fileName.begin() + res, fileName.end(), ext);
|
||||
} else {
|
||||
fileName += ext;
|
||||
}
|
||||
}
|
||||
|
||||
// ACCESSORS
|
||||
std::string writeTemporaryFiles(
|
||||
const std::string& programFolder) const
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
vector<future<void>> fut{headers.size()};
|
||||
transform(headers.cbegin(), headers.cend(), begin(fut),
|
||||
[&](const pair<string, string>& x) {
|
||||
return async([&]() {
|
||||
ofstream h{programFolder + '/' + x.first};
|
||||
h.write(x.second.data(), x.second.size());
|
||||
});
|
||||
});
|
||||
|
||||
auto tmp{(programFolder + '/' + name)};
|
||||
replaceExtension(tmp, ".cpp");
|
||||
ofstream{tmp}.write(source.data(), source.size());
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
std::vector<std::unique_ptr<_hiprtcProgram>> _hiprtcProgram::programs{};
|
||||
std::mutex _hiprtcProgram::mtx{};
|
||||
|
||||
namespace
|
||||
{
|
||||
inline
|
||||
bool isValidProgram(const hiprtcProgram p)
|
||||
{
|
||||
if (!p) return false;
|
||||
|
||||
std::lock_guard<std::mutex> lck{_hiprtcProgram::mtx};
|
||||
|
||||
return _hiprtcProgram::isValid(p);
|
||||
}
|
||||
} // Unnamed namespace.
|
||||
|
||||
extern "C" hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n)
|
||||
{
|
||||
if (!n) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (p->compiled) return HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION;
|
||||
|
||||
const auto id{p->names.size()};
|
||||
|
||||
p->names.emplace_back(n, n);
|
||||
p->loweredNames.emplace_back();
|
||||
|
||||
if (p->names.back().second.back() == ')') {
|
||||
p->names.back().second.pop_back();
|
||||
p->names.back().second.erase(0, p->names.back().second.find('('));
|
||||
}
|
||||
if (p->names.back().second.front() == '&') {
|
||||
p->names.back().second.erase(0, 1);
|
||||
}
|
||||
|
||||
const auto var{"__hiprtc_" + std::to_string(id)};
|
||||
p->source.append("\nextern \"C\" constexpr auto " + var + " = " + n + ';');
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class Unique_temporary_path {
|
||||
// DATA
|
||||
std::string path_{};
|
||||
public:
|
||||
// CREATORS
|
||||
Unique_temporary_path() : path_{std::tmpnam(nullptr)}
|
||||
{
|
||||
while (hip_impl::fileExists(path_)) {
|
||||
path_ = std::tmpnam(nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
Unique_temporary_path(const Unique_temporary_path&) = default;
|
||||
Unique_temporary_path(Unique_temporary_path&&) = default;
|
||||
|
||||
~Unique_temporary_path() noexcept
|
||||
{
|
||||
std::string s("rm -r " + path_);
|
||||
system(s.c_str());
|
||||
}
|
||||
|
||||
// MANIPULATORS
|
||||
Unique_temporary_path& operator=(
|
||||
const Unique_temporary_path&) = default;
|
||||
Unique_temporary_path& operator=(Unique_temporary_path&&) = default;
|
||||
|
||||
// ACCESSORS
|
||||
const std::string& path() const noexcept
|
||||
{
|
||||
return path_;
|
||||
}
|
||||
};
|
||||
} // Unnamed namespace.
|
||||
|
||||
namespace
|
||||
{
|
||||
const std::string& defaultTarget()
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
static string r{"gfx900"};
|
||||
static once_flag f{};
|
||||
|
||||
call_once(f, []() {
|
||||
static hsa_agent_t a{};
|
||||
hsa_iterate_agents([](hsa_agent_t x, void*) {
|
||||
hsa_device_type_t t{};
|
||||
hsa_agent_get_info(x, HSA_AGENT_INFO_DEVICE, &t);
|
||||
|
||||
if (t != HSA_DEVICE_TYPE_GPU) return HSA_STATUS_SUCCESS;
|
||||
|
||||
a = x;
|
||||
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}, nullptr);
|
||||
|
||||
if (!a.handle) return;
|
||||
|
||||
hsa_agent_iterate_isas(a, [](hsa_isa_t x, void*){
|
||||
uint32_t n{};
|
||||
hsa_isa_get_info_alt(x, HSA_ISA_INFO_NAME_LENGTH, &n);
|
||||
|
||||
if (n == 0) return HSA_STATUS_SUCCESS;
|
||||
|
||||
r.resize(n);
|
||||
hsa_isa_get_info_alt(x, HSA_ISA_INFO_NAME, &r[0]);
|
||||
|
||||
r.erase(0, r.find("gfx"));
|
||||
|
||||
return HSA_STATUS_INFO_BREAK;
|
||||
}, nullptr);
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
inline
|
||||
void handleTarget(std::vector<std::string>& args)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
bool hasTarget{false};
|
||||
for (auto&& x : args) {
|
||||
const auto dx{x.find("--gpu-architecture")};
|
||||
const auto dy{(dx == string::npos) ? x.find("-arch")
|
||||
: string::npos};
|
||||
|
||||
if (dx == dy) continue;
|
||||
|
||||
x.replace(0, x.find('=', min(dx, dy)), "--amdgpu-target");
|
||||
hasTarget = true;
|
||||
|
||||
break;
|
||||
}
|
||||
if (!hasTarget) args.push_back("--amdgpu-target=" + defaultTarget());
|
||||
}
|
||||
} // Unnamed namespace.
|
||||
|
||||
extern "C" hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
if (n && !o) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (p->compiled) return HIPRTC_ERROR_COMPILATION;
|
||||
|
||||
static const string hipcc{
|
||||
getenv("HIP_PATH") ? (getenv("HIP_PATH") + string{"/bin/hipcc"})
|
||||
: "/opt/rocm/bin/hipcc"};
|
||||
|
||||
if (!hip_impl::fileExists(hipcc)) {
|
||||
return HIPRTC_ERROR_INTERNAL_ERROR;
|
||||
}
|
||||
|
||||
Unique_temporary_path tmp{};
|
||||
hip_impl::create_directory(tmp.path());
|
||||
|
||||
const auto src{p->writeTemporaryFiles(tmp.path())};
|
||||
|
||||
vector<string> args{hipcc, "-fPIC -shared"};
|
||||
if (n) args.insert(args.cend(), o, o + n);
|
||||
|
||||
handleTarget(args);
|
||||
|
||||
args.emplace_back(src);
|
||||
args.emplace_back("-o");
|
||||
args.emplace_back(tmp.path() + '/' + "hiprtc.out");
|
||||
|
||||
if (!p->compile(args)) return HIPRTC_ERROR_INTERNAL_ERROR;
|
||||
if (!p->readLoweredNames()) return HIPRTC_ERROR_INTERNAL_ERROR;
|
||||
|
||||
p->compiled = true;
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src,
|
||||
const char* name, int n, const char** hdrs,
|
||||
const char** incs)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
if (!p) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (n < 0) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (n && (!hdrs || !incs)) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
|
||||
vector<pair<string, string>> h;
|
||||
for (auto i = 0; i != n; ++i) h.emplace_back(incs[i], hdrs[i]);
|
||||
|
||||
*p = _hiprtcProgram::make(src, name ? name : "default_name", move(h));
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p)
|
||||
{
|
||||
if (!p) return HIPRTC_SUCCESS;
|
||||
|
||||
return _hiprtcProgram::destroy(*p);
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n,
|
||||
const char** ln)
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
if (!n || !ln) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (!p->compiled) return HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION;
|
||||
|
||||
const auto it{find_if(p->names.cbegin(), p->names.cend(),
|
||||
[=](const pair<string, string>& x) {
|
||||
return x.first == n;
|
||||
})};
|
||||
|
||||
if (it == p->names.cend()) return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID;
|
||||
|
||||
*ln = p->loweredNames[distance(p->names.cbegin(), it)].c_str();
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* l)
|
||||
{
|
||||
if (!l) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
|
||||
l = std::copy_n(p->log.data(), p->log.size(), l);
|
||||
*l = '\0';
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz)
|
||||
{
|
||||
if (!sz) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
|
||||
*sz = p->log.empty() ? 0 : p->log.size() + 1;
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c)
|
||||
{
|
||||
if (!c) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
|
||||
std::copy_n(p->elf.data(), p->elf.size(), c);
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz)
|
||||
{
|
||||
if (!sz) return HIPRTC_ERROR_INVALID_INPUT;
|
||||
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
|
||||
|
||||
*sz = p->elf.size();
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" hiprtcResult hiprtcVersion(int* major, int* minor)
|
||||
{
|
||||
if (major == nullptr || minor == nullptr) {
|
||||
return HIPRTC_ERROR_INVALID_INPUT;
|
||||
}
|
||||
|
||||
*major = 9;
|
||||
*minor = 0;
|
||||
|
||||
return HIPRTC_SUCCESS;
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Internal header, do not percolate upwards.
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "hc.hpp"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
namespace hip_impl
|
||||
{
|
||||
hc::accelerator_view lock_stream_hip_(
|
||||
hipStream_t& stream, void*& locked_stream)
|
||||
{ // This allocated but does not take ownership of locked_stream. If it is
|
||||
// not deleted elsewhere it will leak.
|
||||
using L = decltype(stream->lockopen_preKernelCommand());
|
||||
|
||||
HIP_INIT();
|
||||
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
locked_stream = new L{stream->lockopen_preKernelCommand()};
|
||||
return (*static_cast<L*>(locked_stream))->_av;
|
||||
}
|
||||
|
||||
void print_prelaunch_trace_(
|
||||
const char* kernel_name,
|
||||
dim3 num_blocks,
|
||||
dim3 dim_blocks,
|
||||
int group_mem_bytes,
|
||||
hipStream_t stream)
|
||||
{
|
||||
if ((HIP_TRACE_API & (1 << TRACE_KCMD)) ||
|
||||
HIP_PROFILE_API ||
|
||||
(COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL)))) {
|
||||
std::stringstream os;
|
||||
os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum()
|
||||
<< " hipLaunchKernel '" << kernel_name << "'"
|
||||
<< " gridDim:" << num_blocks
|
||||
<< " groupDim:" << dim_blocks
|
||||
<< " sharedMem:+" << group_mem_bytes
|
||||
<< " " << *stream;
|
||||
|
||||
if (HIP_PROFILE_API == 0x1) {
|
||||
std::string shortAtpString("hipLaunchKernel:");
|
||||
shortAtpString += kernel_name;
|
||||
MARKER_BEGIN(shortAtpString.c_str(), "HIP");
|
||||
} else if (HIP_PROFILE_API == 0x2) {
|
||||
MARKER_BEGIN(os.str().c_str(), "HIP");
|
||||
}
|
||||
|
||||
if (COMPILE_HIP_DB && HIP_TRACE_API) {
|
||||
std::string fullStr;
|
||||
GET_TLS();
|
||||
recordApiTrace(tls, &fullStr, os.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void unlock_stream_hip_(
|
||||
hipStream_t stream,
|
||||
void* locked_stream,
|
||||
const char* kernel_name,
|
||||
hc::accelerator_view* acc_v)
|
||||
{ // Precondition: acc_v is the accelerator_view associated with stream
|
||||
// which is guarded by locked_stream;
|
||||
// locked_stream is deletable.
|
||||
using L = decltype(stream->lockopen_preKernelCommand());
|
||||
|
||||
stream->lockclose_postKernelCommand(kernel_name, acc_v);
|
||||
|
||||
delete static_cast<L*>(locked_stream);
|
||||
if(HIP_PROFILE_API) {
|
||||
MARKER_END();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
#include "../include/hip/hcc_detail/program_state.hpp"
|
||||
// contains implementation of program_state_impl
|
||||
#include "program_state.inl"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
kernarg::kernarg() : impl(new kernarg_impl) {
|
||||
}
|
||||
|
||||
kernarg::kernarg(kernarg&& k) : impl(k.impl) {
|
||||
k.impl = nullptr;
|
||||
}
|
||||
|
||||
kernarg::~kernarg() {
|
||||
if (impl)
|
||||
delete(impl);
|
||||
}
|
||||
|
||||
std::uint8_t* kernarg::data() {
|
||||
return impl->v.data();
|
||||
}
|
||||
|
||||
std::size_t kernarg::size() {
|
||||
return impl->v.size();
|
||||
}
|
||||
|
||||
void kernarg::reserve(std::size_t c) {
|
||||
impl->v.reserve(c);
|
||||
}
|
||||
|
||||
void kernarg::resize(std::size_t c) {
|
||||
impl->v.resize(c);
|
||||
}
|
||||
|
||||
std::size_t kernargs_size_align::kernargs_size_align::size(std::size_t n) const{
|
||||
return (*reinterpret_cast<const std::vector<std::pair<std::size_t, std::size_t>>*>(handle))[n].first;
|
||||
}
|
||||
|
||||
std::size_t kernargs_size_align::alignment(std::size_t n) const{
|
||||
return (*reinterpret_cast<const std::vector<std::pair<std::size_t, std::size_t>>*>(handle))[n].second;
|
||||
}
|
||||
|
||||
program_state::program_state() : impl(new program_state_impl) {
|
||||
if (!impl) hip_throw(std::runtime_error {
|
||||
"Unknown error when constructing program state."});
|
||||
}
|
||||
|
||||
program_state::~program_state() {
|
||||
delete(impl);
|
||||
}
|
||||
|
||||
void* program_state::global_addr_by_name(const char* name) {
|
||||
const auto it = impl->get_globals().find(name);
|
||||
if (it == impl->get_globals().end())
|
||||
return nullptr;
|
||||
else
|
||||
return it->second.first;
|
||||
}
|
||||
|
||||
hsa_executable_t program_state::load_executable(const char* data,
|
||||
const size_t data_size,
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent) {
|
||||
return impl->load_executable(data, data_size, true, executable, agent);
|
||||
}
|
||||
|
||||
hsa_executable_t program_state::load_executable_no_copy(const char* data,
|
||||
const size_t data_size,
|
||||
hsa_executable_t executable,
|
||||
hsa_agent_t agent) {
|
||||
return impl->load_executable(data, data_size, false, executable, agent);
|
||||
}
|
||||
|
||||
hipFunction_t program_state::kernel_descriptor(std::uintptr_t function_address,
|
||||
hsa_agent_t agent) {
|
||||
auto& kd = impl->kernel_descriptor(function_address, agent);
|
||||
return kd;
|
||||
}
|
||||
|
||||
kernargs_size_align program_state::get_kernargs_size_align(std::uintptr_t kernel) {
|
||||
kernargs_size_align t;
|
||||
t.handle = reinterpret_cast<const void*>(&impl->kernargs_size_align(kernel));
|
||||
return t;
|
||||
}
|
||||
|
||||
std::mutex executables_cache_mutex;
|
||||
std::vector<hsa_executable_t>& executables_cache(
|
||||
std::string elf, hsa_isa_t isa, hsa_agent_t agent) {
|
||||
static std::unordered_map<std::string,
|
||||
std::unordered_map<hsa_isa_t,
|
||||
std::unordered_map<hsa_agent_t, std::vector<hsa_executable_t>>>> cache;
|
||||
return cache[elf][isa][agent];
|
||||
}
|
||||
};
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -1,125 +0,0 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//#pragma once
|
||||
|
||||
#ifndef TRACE_HELPER_H
|
||||
#define TRACE_HELPER_H
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
//---
|
||||
// Helper functions to convert HIP function arguments into strings.
|
||||
// Handles POD data types as well as enumerations (ie hipMemcpyKind).
|
||||
// The implementation uses C++11 variadic templates and template specialization.
|
||||
// The hipMemcpyKind example below is a good example that shows how to implement conversion for a
|
||||
// new HSA type.
|
||||
|
||||
|
||||
// Handy macro to convert an enumeration to a stringified version of same:
|
||||
#define CASE_STR(x) \
|
||||
case x: \
|
||||
return #x;
|
||||
|
||||
|
||||
// Building block functions:
|
||||
template <typename T>
|
||||
inline std::string ToHexString(T v) {
|
||||
std::ostringstream ss;
|
||||
ss << "0x" << std::hex << v;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
|
||||
//---
|
||||
// Template overloads for ToString to handle specific types
|
||||
|
||||
// This is the default which works for most types:
|
||||
template <typename T>
|
||||
inline std::string ToString(T v) {
|
||||
std::ostringstream ss;
|
||||
ss << v;
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
|
||||
// hipEvent_t specialization. TODO - maybe add an event ID for debug?
|
||||
template <>
|
||||
inline std::string ToString(hipEvent_t v) {
|
||||
std::ostringstream ss;
|
||||
ss << v;
|
||||
return ss.str();
|
||||
};
|
||||
// hipIpcEventHandle_t specialization. TODO
|
||||
template <>
|
||||
inline std::string ToString(hipIpcEventHandle_t v) {
|
||||
return std::string{};
|
||||
};
|
||||
// hipStream_t
|
||||
template <>
|
||||
inline std::string ToString(hipStream_t v) {
|
||||
std::ostringstream ss;
|
||||
if (v == NULL) {
|
||||
ss << "stream:<null>";
|
||||
} else {
|
||||
ss << *v;
|
||||
}
|
||||
|
||||
return ss.str();
|
||||
};
|
||||
|
||||
// hipMemcpyKind specialization
|
||||
template <>
|
||||
inline std::string ToString(hipMemcpyKind v) {
|
||||
switch (v) {
|
||||
CASE_STR(hipMemcpyHostToHost);
|
||||
CASE_STR(hipMemcpyHostToDevice);
|
||||
CASE_STR(hipMemcpyDeviceToHost);
|
||||
CASE_STR(hipMemcpyDeviceToDevice);
|
||||
CASE_STR(hipMemcpyDefault);
|
||||
default:
|
||||
return ToHexString(v);
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
inline std::string ToString(hipError_t v) {
|
||||
return ihipErrorString(v);
|
||||
};
|
||||
|
||||
|
||||
// Catch empty arguments case
|
||||
inline std::string ToString() { return (""); }
|
||||
|
||||
|
||||
//---
|
||||
// C++11 variadic template - peels off first argument, converts to string, and calls itself again to
|
||||
// peel the next arg. Strings are automatically separated by comma+space.
|
||||
template <typename T, typename... Args>
|
||||
inline std::string ToString(T first, Args... args) {
|
||||
return ToString(first) + ", " + ToString(args...);
|
||||
}
|
||||
|
||||
#endif
|
||||
Ссылка в новой задаче
Block a user