Remove hip-hcc codes: Part one

Remove hip-hcc codes from hip code base
Simplify hip CMakeLists.txt to exclude hip-hcc
Simplify cmake cmd for hip-rocclr building
Some minor fixes

Change-Id: I1ae357ecfd638d6c25bca293c1724b026be21ecd
Этот коммит содержится в:
Tao Sang
2020-11-27 20:13:14 -05:00
коммит произвёл Aaron En Ye Shi
родитель 8a5b8a36f2
Коммит 186f95ea30
41 изменённых файлов: 37 добавлений и 14991 удалений
+27 -238
Просмотреть файл
@@ -1,12 +1,18 @@
cmake_minimum_required(VERSION 3.4.3)
project(hip)
# sample command for hip-hcc
# cmake -DHIP_RUNTIME=hcc ..
# sample command for hip-rocclr, you'll need to have rocclr installed
# cmake ..
# cmake -DHIP_COMPILER=clang ..
# cmake -DHIP_COMPILER=clang -DHIP_RUNTIME=rocclr ..
# cmake -DHIP_COMPILER=clang -DHIP_RUNTIME=rocclr -DOPENCL_DIR=/path/to/opencl/api/opencl -DCMAKE_PREFIX_PATH=/path/to/rocclr/build/or/install/directory ..
# sample command for hip-rocclr runtime, you'll need to have rocclr built
# For shared lib of hip-rocclr runtime
# For release version
# cmake -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
# For debug version
# cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
# For static lib of hip-rocclr runtime
# For release version
# cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
# For debug version
# cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Debug -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
# If you don't specify CMAKE_INSTALL_PREFIX, hip-rocclr runtime will be installed to "/opt/rocm/hip".
set(BUILD_SHARED_LIBS ON CACHE BOOL "Build shared library (.so) or static lib (.a) ")
@@ -134,7 +140,7 @@ add_to_config(_versionInfo HIP_VERSION_PATCH)
set (HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR})
set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR})
if (${ROCM_PATCH_VERSION} )
set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION})
set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION})
else ()
set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH})
endif ()
@@ -145,15 +151,6 @@ if (DEFINED ENV{ROCM_RPATH})
set (CMAKE_SKIP_BUILD_RPATH TRUE)
endif ()
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler")
set(HIP_PLATFORM "amd" CACHE STRING "HIP Platform")
set(HIP_RUNTIME "hcc" CACHE STRING "HIP Runtime")
get_filename_component(CXX_PATH ${CMAKE_CXX_COMPILER} DIRECTORY)
get_filename_component(CXX_PATH ${CXX_PATH} DIRECTORY)
set(HCC_HOME "${CXX_PATH}" CACHE PATH "Path to which hcc has been installed")
endif()
# overwrite HIP_VERSION_PATCH for packaging
set(HIP_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_PACKAGING_VERSION_PATCH})
@@ -177,68 +174,21 @@ endif()
message(STATUS "HIP Platform: " ${HIP_PLATFORM})
if(HIP_PLATFORM STREQUAL "nvidia")
set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler")
set(HIP_RUNTIME "cuda" CACHE STRING "HIP Runtime")
endif()
# default runtime is rocclr
if(NOT DEFINED HIP_RUNTIME)
if(NOT DEFINED ENV{HIP_RUNTIME})
set(HIP_RUNTIME "rocclr" CACHE STRING "HIP Runtime")
else()
set(HIP_RUNTIME $ENV{HIP_RUNTIME} CACHE STRING "HIP Compiler")
endif()
set(HIP_COMPILER "nvcc" CACHE STRING "HIP Compiler")
elseif(HIP_PLATFORM STREQUAL "amd")
set(HIP_RUNTIME "rocclr" CACHE STRING "HIP Runtime")
set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler")
else()
message(FATAL_ERROR "Unexpected HIP_PLATFORM: " ${HIP_PLATFORM})
endif()
message(STATUS "HIP Runtime: " ${HIP_RUNTIME})
add_to_config(_buildInfo HIP_RUNTIME)
# Determine HIP_COMPILER
# Either hcc or clang; default is clang
if(NOT DEFINED HIP_COMPILER)
if(NOT DEFINED ENV{HIP_COMPILER})
if(HIP_RUNTIME STREQUAL "hcc")
set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler")
else()
set(HIP_COMPILER "clang" CACHE STRING "HIP Compiler")
endif()
else()
set(HIP_COMPILER $ENV{HIP_COMPILER} CACHE STRING "HIP Compiler")
endif()
endif()
message(STATUS "HIP Compiler: " ${HIP_COMPILER})
add_to_config(_buildInfo HIP_RUNTIME)
add_to_config(_buildInfo HIP_COMPILER)
########### Determine HCC_HOME If compiler is hcc ##################
if(HIP_COMPILER STREQUAL "hcc")
# Determine HCC_HOME
if(NOT DEFINED HCC_HOME)
if(NOT DEFINED ENV{HCC_HOME})
set(HCC_HOME "/opt/rocm/hcc" CACHE PATH "Path to which HCC has been installed")
else()
set(HCC_HOME $ENV{HCC_HOME} CACHE PATH "Path to which HCC has been installed")
endif()
endif()
if(IS_ABSOLUTE ${HCC_HOME} AND EXISTS ${HCC_HOME} AND IS_DIRECTORY ${HCC_HOME})
execute_process(COMMAND ${HCC_HOME}/bin/hcc --version
OUTPUT_VARIABLE HCC_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE ".*based on HCC " "" HCC_VERSION ${HCC_VERSION})
string(REGEX REPLACE " .*" "" HCC_VERSION ${HCC_VERSION})
message(STATUS "Looking for HCC in: " ${HCC_HOME} ". Found version: " ${HCC_VERSION})
else()
message(FATAL_ERROR "Don't know where to find HCC. Please specify abolute path using -DHCC_HOME")
endif()
add_to_config(_buildInfo HCC_VERSION)
string(REPLACE "-" ";" HCC_VERSION_LIST ${HCC_VERSION})
list(GET HCC_VERSION_LIST 0 HCC_PACKAGE_VERSION)
string(REPLACE "." ";" HCC_VERSION_LIST ${HCC_PACKAGE_VERSION})
list(GET HCC_VERSION_LIST 0 HCC_VERSION_MAJOR)
list(GET HCC_VERSION_LIST 1 HCC_VERSION_MINOR)
endif()
############ If HIP_PLATFORM is amd, HSA_PATH has to be defined ##################
if(HIP_PLATFORM STREQUAL "amd")
@@ -256,7 +206,7 @@ if(HIP_PLATFORM STREQUAL "amd")
message(FATAL_ERROR "Don't know where to find HSA runtime. Please specify absolute path using -DHSA_PATH")
endif()
endif()
message(STATUS "\nHSA runtime in: " ${HSA_PATH})
message(STATUS "HSA runtime in: " ${HSA_PATH})
# Set default build type
if(NOT CMAKE_BUILD_TYPE)
@@ -268,13 +218,7 @@ if (UNIX)
set(HIP_DEFAULT_INSTALL_PREFIX "/opt/rocm/hip")
endif()
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
if(CMAKE_BUILD_TYPE MATCHES Debug)
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_SOURCE_DIR} CACHE PATH "Installation path for HIP" FORCE)
elseif(CMAKE_BUILD_TYPE MATCHES Release)
set(CMAKE_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for HIP" FORCE)
else()
message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release")
endif()
set(CMAKE_INSTALL_PREFIX ${HIP_DEFAULT_INSTALL_PREFIX} CACHE PATH "Installation path for HIP" FORCE)
endif()
if(DEV_LOG_ENABLE MATCHES "yes")
@@ -341,129 +285,6 @@ if(HIP_RUNTIME STREQUAL "rocclr")
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include")
endif()
# Build hip_hcc if runtime is hcc
if(HIP_RUNTIME STREQUAL "hcc")
#############################
# Profiling API support
#############################
# Generate profiling API macros/structures header
if(USE_PROF_API EQUAL 1)
set(PROF_API_STR "${PROJECT_BINARY_DIR}/include/hip/hcc_detail/hip_prof_str.h")
set(PROF_API_HDR "${CMAKE_CURRENT_SOURCE_DIR}/include/hip/hcc_detail/hip_runtime_api.h")
set(PROF_API_SRC "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(PROF_API_GEN "${CMAKE_CURRENT_SOURCE_DIR}/hip_prof_gen.py")
set(PROF_API_LOG "${PROJECT_BINARY_DIR}/hip_prof_gen.log.txt")
set(PROF_API_CMD "${PROF_API_GEN} -v ${OPT_PROF_API} ${PROF_API_HDR} ${PROF_API_SRC} ${PROF_API_STR} >${PROF_API_LOG}")
MESSAGE(STATUS "Generating profiling promitives: ${PROF_API_STR}")
execute_process(COMMAND sh -c "rm -f ${PROF_API_STR}; ${PROF_API_CMD}")
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${PROF_API_GEN} ${PROF_API_HDR} ${PROF_API_STR})
# Enable profiling API
find_path(PROF_API_HEADER_DIR prof_protocol.h
HINTS
${PROF_API_HEADER_PATH}
PATHS
/opt/rocm/roctracer
PATH_SUFFIXES
include/ext
)
if(NOT PROF_API_HEADER_DIR)
MESSAGE(WARNING "Profiling API header not found. Disabling roctracer integration. Use -DPROF_API_HEADER_PATH=<path to prof_protocol.h header>")
else()
add_definitions(-DUSE_PROF_API=1)
include_directories(${PROF_API_HEADER_DIR})
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}")
endif()
endif()
include_directories(${PROJECT_BINARY_DIR}/include)
include_directories(${PROJECT_SOURCE_DIR}/include)
set(HIP_HCC_BUILD_FLAGS)
# Add HIP_VERSION to CMAKE_<LANG>_FLAGS
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSION_MAJOR} -DHIP_VERSION_MINOR=${HIP_VERSION_MINOR} -DHIP_VERSION_PATCH=${HIP_VERSION_GITDATE}")
# Add remaining flags
set(HCC_CXX_FLAGS "-Xlinker --enable-new-dtags -hc -fno-gpu-rdc --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906 --amdgpu-target=gfx908 ")
set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -fPIC ${HCC_CXX_FLAGS} -I${HSA_PATH}/include")
# Set compiler and compiler flags
set(CMAKE_CXX_COMPILER "${HCC_HOME}/bin/hcc")
set(CMAKE_C_COMPILER "${HCC_HOME}/bin/hcc")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}")
set(SOURCE_FILES_RUNTIME
src/program_state.cpp
src/hip_clang.cpp
src/hip_hcc.cpp
src/hip_context.cpp
src/hip_device.cpp
src/hip_error.cpp
src/hip_event.cpp
src/hip_fatbin.cpp
src/hip_memory.cpp
src/hip_peer.cpp
src/hip_stream.cpp
src/hip_module.cpp
src/hip_db.cpp
src/grid_launch.cpp
src/hip_texture.cpp
src/hip_surface.cpp
src/hip_intercept.cpp
src/env.cpp
src/h2f.cpp)
add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME})
add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME})
## Set the VERSION and SOVERSION values
set_property ( TARGET hip_hcc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
set_property ( TARGET hip_hcc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
target_link_libraries(hip_hcc PRIVATE hc_am)
target_link_libraries(hip_hcc_static PRIVATE hc_am)
add_library(hiprtc SHARED src/hiprtc.cpp)
target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY)
set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" )
set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" )
target_include_directories(
hiprtc SYSTEM
PRIVATE ${PROJECT_SOURCE_DIR}/include ${HSA_PATH}/include)
set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden)
set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden)
set_target_properties(hiprtc PROPERTIES VISIBILITY_INLINES_HIDDEN 1)
find_package(amd_comgr REQUIRED CONFIG
PATHS
/opt/rocm/
PATH_SUFFIXES
cmake/amd_comgr
lib/cmake/amd_comgr
)
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
target_link_libraries(hip_hcc PRIVATE amd_comgr)
target_link_libraries(hip_hcc_static PRIVATE amd_comgr)
string(REPLACE " " ";" HCC_CXX_FLAGS_LIST ${HCC_CXX_FLAGS})
foreach(TARGET hip_hcc hip_hcc_static)
target_include_directories(${TARGET} SYSTEM INTERFACE $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>;${HSA_PATH}/include)
endforeach()
add_library(host INTERFACE)
target_link_libraries(host INTERFACE hip_hcc)
add_library(device INTERFACE)
if(HIP_COMPILER STREQUAL "hcc")
target_link_libraries(device INTERFACE host hcc::hccrt hcc::hc_am)
else()
target_link_libraries(device INTERFACE host)
endif()
endif()
if(HIP_PLATFORM STREQUAL "amd")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca)
endif()
@@ -488,10 +309,6 @@ endif()
#############################
# Install steps
#############################
# Install hip_hcc if runtime is hcc
if(HIP_RUNTIME STREQUAL "hcc")
install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib)
endif()
# Install .hipInfo
install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib)
@@ -503,9 +320,7 @@ install(FILES ${PROJECT_BINARY_DIR}/.hipVersion DESTINATION bin)
execute_process(COMMAND test ${CMAKE_INSTALL_PREFIX} -ef ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE INSTALL_SOURCE)
if(NOT ${INSTALL_SOURCE} EQUAL 0)
if(HIP_RUNTIME STREQUAL "hcc")
install(DIRECTORY src DESTINATION .)
elseif(HIP_RUNTIME STREQUAL "rocclr")
if(HIP_RUNTIME STREQUAL "rocclr")
install(DIRECTORY rocclr DESTINATION .)
endif()
install(DIRECTORY bin DESTINATION . USE_SOURCE_PERMISSIONS)
@@ -522,13 +337,6 @@ install(DIRECTORY ${PROJECT_BINARY_DIR}/include/hip
DESTINATION include
FILES_MATCHING PATTERN "*.h*")
if(HIP_RUNTIME STREQUAL "hcc")
install(TARGETS hip_hcc_static hip_hcc host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
elseif(HIP_RUNTIME STREQUAL "rocclr")
# install(TARGETS hip_on_rocclr host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
endif()
#############################
# hip-config
#############################
@@ -577,22 +385,7 @@ if (BUILD_HIPIFY_CLANG)
add_dependencies(pkg_hip_base hipify-clang)
endif()
if(HIP_RUNTIME STREQUAL "hcc")
message("HCC Package\n")
# Package: hip_hcc
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hip_hcc)
configure_file(packaging/hip-hcc.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
configure_file(packaging/hip-hcc.postinst ${BUILD_DIR}/postinst @ONLY)
configure_file(packaging/hip-hcc.prerm ${BUILD_DIR}/prerm @ONLY)
add_custom_target(pkg_hip_hcc COMMAND ${CMAKE_COMMAND} .
COMMAND rm -rf *.deb *.rpm *.tar.gz
COMMAND make package
COMMAND cp *.deb ${PROJECT_BINARY_DIR}
COMMAND cp *.rpm ${PROJECT_BINARY_DIR}
COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR}
WORKING_DIRECTORY ${BUILD_DIR}
DEPENDS hip_hcc hip_hcc_static hiprtc)
elseif(HIP_RUNTIME STREQUAL "rocclr")
if(HIP_RUNTIME STREQUAL "rocclr")
set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/rocclr)
configure_file(packaging/hip-rocclr.txt ${BUILD_DIR}/CMakeLists.txt @ONLY)
configure_file(packaging/hip-rocclr.postinst ${BUILD_DIR}/postinst @ONLY)
@@ -645,11 +438,7 @@ if(POLICY CMP0037)
cmake_policy(SET CMP0037 OLD)
endif()
if(HIP_RUNTIME STREQUAL "hcc")
add_custom_target(package
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS pkg_hip_base pkg_hip_hcc pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
elseif(HIP_RUNTIME STREQUAL "rocclr")
if(HIP_RUNTIME STREQUAL "rocclr")
add_custom_target(package
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS pkg_hip_base hip_on_rocclr pkg_hip_nvcc pkg_hip_doc pkg_hip_samples)
+2 -1
Просмотреть файл
@@ -95,9 +95,10 @@ git clone -b rocm-3.10.x https://github.com/ROCm-Developer-Tools/HIP.git
export HIP_DIR="$(readlink -f HIP)"
cd "$HIP_DIR"
mkdir -p build; cd build
cmake -DCMAKE_BUILD_TYPE=Release -DHIP_COMPILER=clang -DHIP_PLATFORM=rocclr -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
cmake -DCMAKE_PREFIX_PATH="$ROCclr_DIR/build;/opt/rocm/" -DCMAKE_INSTALL_PREFIX=</where/to/install/hip> ..
make -j
sudo make install
Note: If you don't specify CMAKE_INSTALL_PREFIX, hip-rocclr runtime will be installed to "/opt/rocm/hip".
```
## Default paths and environment variables
+6 -24
Просмотреть файл
@@ -8,7 +8,7 @@ use 5.006; use v5.10.1;
use Getopt::Long;
use Cwd;
# Return name of HIP compiler - either 'nvcc' or 'hcc'
# Return name of HIP compiler - either 'clang' or 'nvcc'
#
use Getopt::Long;
use File::Basename;
@@ -34,9 +34,9 @@ if ($p_help) {
print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n";
print " --rocmpath, -R : print ROCM_PATH (use env var if set, else determine from hip path or /opt/rocm)\n";
print " --cpp_config, -C : print C++ compiler options\n";
print " --compiler, -c : print compiler (hcc or clang or nvcc)\n";
print " --compiler, -c : print compiler (clang or nvcc)\n";
print " --platform, -P : print platform (amd or nvidia)\n";
print " --runtime, -r : print runtime (hcc or rocclr)\n";
print " --runtime, -r : print runtime (rocclr or cuda)\n";
print " --hipclangpath, -l : print HIP_CLANG_PATH\n";
print " --full, -f : print full config\n";
print " --version, -v : print hip version\n";
@@ -86,7 +86,6 @@ if (-e "$HIP_PATH/../bin/rocm_agent_enumerator") {
$ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
}
$CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda';
$HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc";
$HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa";
$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin";
# HIP_ROCCLR_HOME is used by Windows builds
@@ -120,8 +119,6 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "rocclr" and !defined $HIP_ROCCLR_H
if (not defined $HIP_PLATFORM) {
if (can_run("$HIP_CLANG_PATH/clang++") or can_run("clang++")) {
$HIP_PLATFORM = "amd";
} elsif (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) {
$HIP_PLATFORM = "amd";
} elsif (can_run("$CUDA_PATH/bin/nvcc") or can_run("nvcc")) {
$HIP_PLATFORM = "nvidia";
$HIP_COMPILER = "nvcc";
@@ -132,9 +129,6 @@ if (not defined $HIP_PLATFORM) {
}
}
if ($HIP_COMPILER eq "hcc") {
$CPP_CONFIG = " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include";
}
if ($HIP_COMPILER eq "clang") {
# Windows does not have clang at linux default path
if (defined $HIP_ROCCLR_HOME and (-e "$HIP_ROCCLR_HOME/bin/clang" or -e "$HIP_ROCCLR_HOME/bin/clang.exe")) {
@@ -216,20 +210,6 @@ if (!$printed or $p_full) {
if ($HIP_PLATFORM eq "amd")
{
print "\n" ;
if ($HIP_COMPILER eq "hcc")
{
print "== hcc\n";
print ("HSA_PATH : $HSA_PATH\n");
print ("HCC_HOME : $HCC_HOME\n");
system("$HCC_HOME/bin/hcc --version");
system("$HCC_HOME/bin/llc --version");
print ("HCC-cxxflags : ");
system("$HCC_HOME/bin/hcc-config --cxxflags");
printf("\n");
print ("HCC-ldflags : ");
system("$HCC_HOME/bin/hcc-config --ldflags");
printf("\n");
}
if ($HIP_COMPILER eq "clang")
{
print "== hip-clang\n";
@@ -243,6 +223,8 @@ if (!$printed or $p_full) {
print ("hip-clang-ldflags : ");
system("$HIP_PATH/bin/hipcc --ldflags");
printf("\n");
} else {
print ("Unexpected HIP_COMPILER: $HIP_COMPILER\n");
}
}
if ($HIP_PLATFORM eq "nvidia") {
@@ -256,7 +238,7 @@ if (!$printed or $p_full) {
print "=== Environment Variables\n";
system("echo PATH=\$PATH");
system("env | egrep '^HIP|^HSA|^HCC|^CUDA|^LD_LIBRARY_PATH'");
system("env | egrep '^HIP|^HSA|^CUDA|^LD_LIBRARY_PATH'");
print "\n" ;
Исполняемый файл → Обычный файл
Просмотреть файл
-3
Просмотреть файл
@@ -13,8 +13,6 @@ set_target_properties(
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(lpl
PUBLIC
${PROJECT_SOURCE_DIR}/src
PRIVATE
$<TARGET_PROPERTY:amdrocclr_static,INTERFACE_INCLUDE_DIRECTORIES>)
@@ -34,7 +32,6 @@ set_target_properties(
CXX_EXTENSIONS OFF
RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
target_include_directories(ca SYSTEM PUBLIC ${HSA_PATH}/include)
target_include_directories(ca PUBLIC ${PROJECT_SOURCE_DIR}/src)
find_package(hsa-runtime64 REQUIRED CONFIG
PATHS
+1 -2
Просмотреть файл
@@ -2,8 +2,7 @@
#include "common.hpp"
#include "../src/code_object_bundle.inl"
#include "code_object_bundle.inl"
#include "clara/clara.hpp"
#include <algorithm>
+1 -1
Просмотреть файл
@@ -637,7 +637,7 @@ struct BoundFlagRefBase : BoundRefBase {
auto isFlag() const -> bool override { return true; }
auto setValue(std::string const& arg) -> ParserResult override {
bool flag;
bool flag = 0;
auto result = convertInto(arg, flag);
if (result) setFlag(flag);
return result;
Просмотреть файл
-45
Просмотреть файл
@@ -1,45 +0,0 @@
//===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// Enums and constants for AMDGPU PT_NOTE sections.
///
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
namespace AMDGPU {
namespace ElfNote {
const char SectionName[] = ".note";
const char NoteName[] = "AMD";
// TODO: Move this enum to include/llvm/Support so it can be used in tools?
enum NoteType {
NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
NT_AMDGPU_HSA_HSAIL = 2,
NT_AMDGPU_HSA_ISA = 3,
NT_AMDGPU_HSA_PRODUCER = 4,
NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
NT_AMDGPU_HSA_EXTENSION = 6,
NT_AMDGPU_HSA_RUNTIME_METADATA_V_1 = 7, // deprecated since 12/14/16.
NT_AMDGPU_HSA_RUNTIME_METADATA_V_2 = 8,
NT_AMDGPU_HSA_RUNTIME_METADATA = NT_AMDGPU_HSA_RUNTIME_METADATA_V_2,
NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};
} // namespace ElfNote
} // namespace AMDGPU
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H
-290
Просмотреть файл
@@ -1,290 +0,0 @@
//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file
///
/// Enums and structure types used by runtime metadata.
///
/// Runtime requests certain information (metadata) about kernels to be able
/// to execute the kernels and answer the queries about the kernels.
/// The metadata is represented as a note element in the .note ELF section of a
/// binary (code object). The desc field of the note element is a YAML string
/// consisting of key-value pairs. Each key is a string. Each value can be
/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
/// At the beginning of the YAML string is the module level YAML map. A
/// kernel-level YAML map is in the amd.Kernels sequence. A
/// kernel-argument-level map is in the amd.Args sequence.
///
/// The format should be kept backward compatible. New enum values and bit
/// fields should be appended at the end. It is suggested to bump up the
/// revision number whenever the format changes and document the change
/// in the revision in this header.
///
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
#include <cstdint>
#include <vector>
#include <string>
namespace AMDGPU {
namespace RuntimeMD {
// Version and revision of runtime metadata
const unsigned char MDVersion = 2;
const unsigned char MDRevision = 1;
// Name of keys for runtime metadata.
namespace KeyName {
// Runtime metadata version
const char MDVersion[] = "amd.MDVersion";
// Instruction set architecture information
const char IsaInfo[] = "amd.IsaInfo";
// Wavefront size
const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
// Local memory size in bytes
const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
// Number of execution units per compute unit
const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
// Maximum number of waves per execution unit
const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
// Maximum flat work group size
const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
// SGPR allocation granularity
const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
// Total number of SGPRs
const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
// Addressable number of SGPRs
const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
// VGPR allocation granularity
const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
// Total number of VGPRs
const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
// Addressable number of VGPRs
const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
// Language
const char Language[] = "amd.Language";
// Language version
const char LanguageVersion[] = "amd.LanguageVersion";
// Kernels
const char Kernels[] = "amd.Kernels";
// Kernel name
const char KernelName[] = "amd.KernelName";
// Kernel arguments
const char Args[] = "amd.Args";
// Kernel argument size in bytes
const char ArgSize[] = "amd.ArgSize";
// Kernel argument alignment
const char ArgAlign[] = "amd.ArgAlign";
// Kernel argument type name
const char ArgTypeName[] = "amd.ArgTypeName";
// Kernel argument name
const char ArgName[] = "amd.ArgName";
// Kernel argument kind
const char ArgKind[] = "amd.ArgKind";
// Kernel argument value type
const char ArgValueType[] = "amd.ArgValueType";
// Kernel argument address qualifier
const char ArgAddrQual[] = "amd.ArgAddrQual";
// Kernel argument access qualifier
const char ArgAccQual[] = "amd.ArgAccQual";
// Kernel argument is const qualified
const char ArgIsConst[] = "amd.ArgIsConst";
// Kernel argument is restrict qualified
const char ArgIsRestrict[] = "amd.ArgIsRestrict";
// Kernel argument is volatile qualified
const char ArgIsVolatile[] = "amd.ArgIsVolatile";
// Kernel argument is pipe qualified
const char ArgIsPipe[] = "amd.ArgIsPipe";
// Required work group size
const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
// Work group size hint
const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
// Vector type hint
const char VecTypeHint[] = "amd.VecTypeHint";
// Kernel index for device enqueue
const char KernelIndex[] = "amd.KernelIndex";
// No partial work groups
const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
// Prinf function call information
const char PrintfInfo[] = "amd.PrintfInfo";
// The actual kernel argument access qualifier
const char ArgActualAcc[] = "amd.ArgActualAcc";
// Alignment of pointee type
const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
} // end namespace KeyName
namespace KernelArg {
enum Kind : uint8_t {
ByValue = 0,
GlobalBuffer = 1,
DynamicSharedPointer = 2,
Sampler = 3,
Image = 4,
Pipe = 5,
Queue = 6,
HiddenGlobalOffsetX = 7,
HiddenGlobalOffsetY = 8,
HiddenGlobalOffsetZ = 9,
HiddenNone = 10,
HiddenPrintfBuffer = 11,
HiddenDefaultQueue = 12,
HiddenCompletionAction = 13,
};
enum ValueType : uint16_t {
Struct = 0,
I8 = 1,
U8 = 2,
I16 = 3,
U16 = 4,
F16 = 5,
I32 = 6,
U32 = 7,
F32 = 8,
I64 = 9,
U64 = 10,
F64 = 11,
};
// Avoid using 'None' since it conflicts with a macro in X11 header file.
enum AccessQualifer : uint8_t {
AccNone = 0,
ReadOnly = 1,
WriteOnly = 2,
ReadWrite = 3,
};
enum AddressSpaceQualifer : uint8_t {
Private = 0,
Global = 1,
Constant = 2,
Local = 3,
Generic = 4,
Region = 5,
};
} // end namespace KernelArg
// Invalid values are used to indicate an optional key should not be emitted.
const uint8_t INVALID_ADDR_QUAL = 0xff;
const uint8_t INVALID_ACC_QUAL = 0xff;
const uint32_t INVALID_KERNEL_INDEX = ~0U;
namespace KernelArg {
// In-memory representation of kernel argument information.
struct Metadata {
uint32_t Size = 0;
uint32_t Align = 0;
uint32_t PointeeAlign = 0;
uint8_t Kind = 0;
uint16_t ValueType = 0;
std::string TypeName;
std::string Name;
uint8_t AddrQual = INVALID_ADDR_QUAL;
uint8_t AccQual = INVALID_ACC_QUAL;
uint8_t IsVolatile = 0;
uint8_t IsConst = 0;
uint8_t IsRestrict = 0;
uint8_t IsPipe = 0;
Metadata() = default;
};
} // end namespace KernelArg
namespace Kernel {
// In-memory representation of kernel information.
struct Metadata {
std::string Name;
std::string Language;
std::vector<uint8_t> LanguageVersion;
std::vector<uint32_t> ReqdWorkGroupSize;
std::vector<uint32_t> WorkGroupSizeHint;
std::string VecTypeHint;
uint32_t KernelIndex = INVALID_KERNEL_INDEX;
uint8_t NoPartialWorkGroups = 0;
std::vector<KernelArg::Metadata> Args;
Metadata() = default;
};
} // end namespace Kernel
namespace IsaInfo {
/// \brief In-memory representation of instruction set architecture
/// information.
struct Metadata {
/// \brief Wavefront size.
unsigned WavefrontSize = 0;
/// \brief Local memory size in bytes.
unsigned LocalMemorySize = 0;
/// \brief Number of execution units per compute unit.
unsigned EUsPerCU = 0;
/// \brief Maximum number of waves per execution unit.
unsigned MaxWavesPerEU = 0;
/// \brief Maximum flat work group size.
unsigned MaxFlatWorkGroupSize = 0;
/// \brief SGPR allocation granularity.
unsigned SGPRAllocGranule = 0;
/// \brief Total number of SGPRs.
unsigned TotalNumSGPRs = 0;
/// \brief Addressable number of SGPRs.
unsigned AddressableNumSGPRs = 0;
/// \brief VGPR allocation granularity.
unsigned VGPRAllocGranule = 0;
/// \brief Total number of VGPRs.
unsigned TotalNumVGPRs = 0;
/// \brief Addressable number of VGPRs.
unsigned AddressableNumVGPRs = 0;
Metadata() = default;
};
} // end namespace IsaInfo
namespace Program {
// In-memory representation of program information.
struct Metadata {
std::vector<uint8_t> MDVersionSeq;
IsaInfo::Metadata IsaInfo;
std::vector<std::string> PrintfInfo;
std::vector<Kernel::Metadata> Kernels;
explicit Metadata() = default;
// Construct from an YAML string.
explicit Metadata(const std::string& YAML);
// Convert to YAML string.
std::string toYAML();
// Convert from YAML string.
static Metadata fromYAML(const std::string& S);
};
} // end namespace Program
} // end namespace RuntimeMD
} // end namespace AMDGPU
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-136
Просмотреть файл
@@ -1,136 +0,0 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef DEVICE_UTIL_H
#define DEVICE_UTIL_H
#include <hip/hcc_detail/hip_runtime.h>
/*
Heap size computation for malloc and free device functions.
*/
#define HIP_SQRT_2 1.41421356237
#define HIP_SQRT_PI 1.77245385091
#define __hip_erfinva3 -0.140543331
#define __hip_erfinva2 0.914624893
#define __hip_erfinva1 -1.645349621
#define __hip_erfinva0 0.886226899
#define __hip_erfinvb4 0.012229801
#define __hip_erfinvb3 -0.329097515
#define __hip_erfinvb2 1.442710462
#define __hip_erfinvb1 -2.118377725
#define __hip_erfinvb0 1
#define __hip_erfinvc3 1.641345311
#define __hip_erfinvc2 3.429567803
#define __hip_erfinvc1 -1.62490649
#define __hip_erfinvc0 -1.970840454
#define __hip_erfinvd2 1.637067800
#define __hip_erfinvd1 3.543889200
#define __hip_erfinvd0 1
#define HIP_PI 3.14159265358979323846
__device__ float __hip_erfinvf(float x);
__device__ double __hip_erfinv(double x);
__device__ float __hip_j0f(float x);
__device__ double __hip_j0(double x);
__device__ float __hip_j1f(float x);
__device__ double __hip_j1(double x);
__device__ float __hip_y0f(float x);
__device__ double __hip_y0(double x);
__device__ float __hip_y1f(float x);
__device__ double __hip_y1(double x);
__device__ float __hip_jnf(int n, float x);
__device__ double __hip_jn(int n, double x);
__device__ float __hip_ynf(int n, float x);
__device__ double __hip_yn(int n, double x);
__device__ float __hip_precise_cosf(float x);
__device__ float __hip_precise_exp10f(float x);
__device__ float __hip_precise_expf(float x);
__device__ float __hip_precise_frsqrt_rn(float x);
__device__ float __hip_precise_fsqrt_rd(float x);
__device__ float __hip_precise_fsqrt_rn(float x);
__device__ float __hip_precise_fsqrt_ru(float x);
__device__ float __hip_precise_fsqrt_rz(float x);
__device__ float __hip_precise_log10f(float x);
__device__ float __hip_precise_log2f(float x);
__device__ float __hip_precise_logf(float x);
__device__ float __hip_precise_powf(float base, float exponent);
__device__ void __hip_precise_sincosf(float x, float* s, float* c);
__device__ float __hip_precise_sinf(float x);
__device__ float __hip_precise_tanf(float x);
// Double Precision Math
__device__ double __hip_precise_dsqrt_rd(double x);
__device__ double __hip_precise_dsqrt_rn(double x);
__device__ double __hip_precise_dsqrt_ru(double x);
__device__ double __hip_precise_dsqrt_rz(double x);
// Float Fast Math
__device__ float __hip_fast_exp10f(float x);
__device__ float __hip_fast_expf(float x);
__device__ float __hip_fast_frsqrt_rn(float x);
__device__ float __hip_fast_fsqrt_rn(float x);
__device__ float __hip_fast_fsqrt_ru(float x);
__device__ float __hip_fast_fsqrt_rz(float x);
__device__ float __hip_fast_log10f(float x);
__device__ float __hip_fast_logf(float x);
__device__ float __hip_fast_powf(float base, float exponent);
__device__ void __hip_fast_sincosf(float x, float* s, float* c);
__device__ float __hip_fast_tanf(float x);
// Double Precision Math
__device__ double __hip_fast_dsqrt_rd(double x);
__device__ double __hip_fast_dsqrt_rn(double x);
__device__ double __hip_fast_dsqrt_ru(double x);
__device__ double __hip_fast_dsqrt_rz(double x);
float __hip_host_j0f(float x);
double __hip_host_j0(double x);
float __hip_host_j1f(float x);
double __hip_host_j1(double x);
float __hip_host_y0f(float x);
double __hip_host_y1(double x);
float __hip_host_y1f(float x);
double __hip_host_y1(double x);
float __hip_host_jnf(int n, float x);
double __hip_host_jn(int n, double x);
float __hip_host_ynf(int n, float x);
double __hip_host_yn(int n, double x);
#endif
-109
Просмотреть файл
@@ -1,109 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include "env.h"
//---
// Read environment variables.
void ihipReadEnv_I(int* var_ptr, const char* var_name1, const char* var_name2,
const char* description) {
char* env = getenv(var_name1);
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
if ((env == NULL) && strcmp(var_name2, "0")) {
env = getenv(var_name2);
}
// Default is set when variable is initialized (at top of this file), so only override if we
// find an environment variable.
if (env) {
long int v = strtol(env, NULL, 0);
*var_ptr = (int)(v);
}
if (HIP_PRINT_ENV) {
printf("%-30s = %2d : %s\n", var_name1, *var_ptr, description);
}
}
void ihipReadEnv_S(std::string* var_ptr, const char* var_name1, const char* var_name2,
const char* description) {
char* env = getenv(var_name1);
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
if ((env == NULL) && strcmp(var_name2, "0")) {
env = getenv(var_name2);
}
if (env) {
*static_cast<std::string*>(var_ptr) = env;
}
if (HIP_PRINT_ENV) {
printf("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description);
}
}
void ihipReadEnv_Callback(void* var_ptr, const char* var_name1, const char* var_name2,
const char* description,
std::string (*setterCallback)(void* var_ptr, const char* env)) {
char* env = getenv(var_name1);
// Check second name if first not defined, used to allow HIP_ or CUDA_ env vars.
if ((env == NULL) && strcmp(var_name2, "0")) {
env = getenv(var_name2);
}
std::string var_string = "0";
if (env) {
var_string = setterCallback(var_ptr, env);
}
if (HIP_PRINT_ENV) {
printf("%-30s = %s : %s\n", var_name1, var_string.c_str(), description);
}
}
void tokenize(const std::string& s, char delim, std::vector<std::string>* tokens) {
std::stringstream ss;
ss.str(s);
std::string item;
while (getline(ss, item, delim)) {
item.erase(std::remove(item.begin(), item.end(), ' '), item.end()); // remove whitespace.
tokens->push_back(item);
}
}
void trim(std::string* s) {
// trim whitespace from beginning and end:
const char* t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
s->erase(s->find_last_not_of(t) + 1);
}
static void ltrim(std::string* s) {
// trim whitespace from beginning
const char* t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
}
-27
Просмотреть файл
@@ -1,27 +0,0 @@
#pragma once
extern void HipReadEnv();
#define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \
ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);
#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \
ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);
#define READ_ENV_C(_build, _ENV_VAR, _ENV_VAR2, _description, _callback) \
ihipReadEnv_Callback(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description, _callback);
extern void ihipReadEnv_I(int* var_ptr, const char* var_name1, const char* var_name2,
const char* description);
extern void ihipReadEnv_S(std::string* var_ptr, const char* var_name1, const char* var_name2,
const char* description);
extern void ihipReadEnv_Callback(void* var_ptr, const char* var_name1, const char* var_name2,
const char* description,
std::string (*setterCallback)(void* var_ptr, const char* env));
// String functions:
extern void trim(std::string* s);
extern void tokenize(const std::string& s, char delim, std::vector<std::string>* tokens);
-60
Просмотреть файл
@@ -1,60 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hcc_detail/program_state.hpp"
#include "hip/hip_runtime_api.h"
// Internal header, do not percolate upwards.
#include "hip_hcc_internal.h"
#include "hc.hpp"
#include "trace_helper.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <iostream>
using namespace hc;
using namespace std;
namespace hip_impl
{
HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream)
{
if (stream) {
return *static_cast<hsa_agent_t*>(
stream->locked_getAv()->get_hsa_agent());
}
GET_TLS();
if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
return ihipGetDevice(
ihipGetTlsDefaultCtx()->getDevice()->_deviceId)->_hsaAgent;
}
else {
return *static_cast<hsa_agent_t*>(
accelerator{}.get_default_view().get_hsa_agent());
}
}
}
-29
Просмотреть файл
@@ -1,29 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hcc_detail/grid_launch_GGL.hpp"
#if __hcc_workweek__ >= 17481
#include "functional_grid_launch.inl"
#else
#include "macro_based_grid_launch.inl"
#endif
-70
Просмотреть файл
@@ -1,70 +0,0 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <cstdint>
#include <algorithm>
// conversion routines between float and half precision
static inline std::uint32_t f32_as_u32(float f) { union { float f; std::uint32_t u; } v; v.f = f; return v.u; }
static inline float u32_as_f32(std::uint32_t u) { union { float f; std::uint32_t u; } v; v.u = u; return v.f; }
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
// half to float, the f16 is in the low 16 bits of the input argument a
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
std::uint32_t v = f32_as_u32(u32_as_f32(u) * 0x1.0p+112f) + 0x38000000U;
u = (a & 0x7fff) != 0 ? v : u;
return u32_as_f32(u) * 0x1.0p-112f;
}
// float to half with nearest even rounding
// The lower 16 bits of the result is the bit pattern for the f16
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
std::uint32_t u = f32_as_u32(a);
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
std::uint32_t n = ((std::uint32_t)e << 12) | m;
std::uint32_t s = (u >> 16) & 0x8000;
int b = clamp_int(1-e, 0, 13);
std::uint32_t d = (0x1000 | m) >> b;
d |= (d << b) != (0x1000 | m);
std::uint32_t v = e < 1 ? d : n;
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
v = e > 30 ? 0x7c00 : v;
v = e == 143 ? i : v;
return s | v;
}
// On machines without fp16 instructions, clang lowers llvm.convert.from.fp16
// to call of this function.
extern "C" __attribute__((visibility("default")))
float __gnu_h2f_ieee(unsigned short h){
return __convert_half_to_float((std::uint32_t) h);
}
// On machines without fp16 instructions, clang lowers llvm.convert.to.fp16
// to call of this function.
extern "C" __attribute__((visibility("default")))
unsigned short __gnu_f2h_ieee(float f){
return (unsigned short)__convert_float_to_half(f);
}
-533
Просмотреть файл
@@ -1,533 +0,0 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <unordered_map>
#include <string>
#include <fstream>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "hip_fatbin.h"
#include "trace_helper.h"
#include "program_state.inl"
#ifdef __GNUC__
#pragma GCC visibility push (default)
#endif
extern "C" std::vector<hipModule_t>*
__hipRegisterFatBinary(const void* data)
{
hip_impl::hip_init();
tprintf(DB_FB, "Enter __hipRegisterFatBinary(%p)\n", data);
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
return nullptr;
}
const __ClangOffloadBundleHeader* header = fbwrapper->binary;
std::string magic(reinterpret_cast<const char*>(header), sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1);
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC)) {
return nullptr;
}
auto modules = new std::vector<hipModule_t>(g_deviceCnt);
if (!modules) {
return nullptr;
}
const __ClangOffloadBundleDesc* desc = &header->desc[0];
for (uint64_t i = 0; i < header->numBundles; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
std::string triple{&desc->triple[0], sizeof(AMDGCN_AMDHSA_TRIPLE) - 1};
if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
continue;
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
char name[64] = {};
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
if (target.compare(name)) {
continue;
}
ihipModule_t* module = new ihipModule_t;
if (!module) {
continue;
}
hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, nullptr,
&module->executable);
std::string image{reinterpret_cast<const char*>(
reinterpret_cast<uintptr_t>(header) + desc->offset), desc->size};
if (HIP_DUMP_CODE_OBJECT)
__hipDumpCodeObject(image);
module->executable = hip_impl::get_program_state().load_executable_no_copy(
reinterpret_cast<const char*>(header) + desc->offset, desc->size,
module->executable, agent);
if (module->executable.handle) {
hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs);
modules->at(deviceId) = module;
tprintf(DB_FB, "Loaded code object for %s, args size=%ld\n", name, module->kernargs.size());
} else {
fprintf(stderr, "Failed to load code object for %s\n", name);
abort();
}
}
}
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
char name[64] = {};
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
if (!(*modules)[deviceId]) {
fprintf(stderr, "No device code bundle for %s\n", name);
abort();
}
}
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
return modules;
}
std::map<const void*, std::vector<hipFunction_t>> g_functions;
extern "C" void __hipRegisterFunction(
std::vector<hipModule_t>* modules,
const void* hostFunction,
char* deviceFunction,
const char* deviceName,
unsigned int threadLimit,
uint3* tid,
uint3* bid,
dim3* blockDim,
dim3* gridDim,
int* wSize)
{
HIP_INIT_API(NONE, modules, hostFunction, deviceFunction, deviceName);
std::vector<hipFunction_t> functions(g_deviceCnt);
assert(modules && modules->size() >= g_deviceCnt);
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hipFunction_t function;
hsa_agent_t agent = g_allAgents[deviceId + 1];
if ((hipSuccess == hipModuleGetFunctionEx(&function, modules->at(deviceId), deviceName, &agent) ||
// With code-object-v3, we need to match the kernel descriptor symbol name
(hipSuccess == hipModuleGetFunctionEx(
&function, modules->at(deviceId),
(std::string(deviceName) + std::string(".kd")).c_str(),
&agent
))) && function != nullptr) {
functions[deviceId] = function;
}
else {
tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
" device %d\n", deviceName, deviceId);
}
}
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
}
static inline const char* hsa_strerror(hsa_status_t status) {
const char* str = nullptr;
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
return str;
}
return "Unknown error";
}
struct RegisteredVar {
public:
RegisteredVar(): size_(0), devicePtr_(nullptr) {}
~RegisteredVar() {}
static inline const char* hsa_strerror(hsa_status_t status) {
const char* str = nullptr;
if (hsa_status_string(status, &str) == HSA_STATUS_SUCCESS) {
return str;
}
return "Unknown error";
}
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
size_t getvarsize() const { return size_; };
size_t size_; // Size of the variable
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
};
struct DeviceVar {
void* shadowVptr;
std::string hostVar;
size_t size;
std::vector<hipModule_t>* modules;
std::vector<RegisteredVar> rvars;
bool dyn_undef;
};
std::unordered_multimap<std::string, DeviceVar > g_vars;
//The logic follows PlatformState::getGlobalVar in ROCclr RT
static DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod) {
DeviceVar* dvar = nullptr;
if (hmod != nullptr) {
// If module is provided, then get the var only from that module
auto var_range = g_vars.equal_range(hostVar);
for (auto it = var_range.first; it != var_range.second; ++it) {
if ((*it->second.modules)[deviceId] == hmod) {
dvar = &(it->second);
break;
}
}
} else {
// If var count is < 2, return the var
if (g_vars.count(hostVar) < 2) {
auto it = g_vars.find(hostVar);
dvar = ((it == g_vars.end()) ? nullptr : &(it->second));
} else {
// If var count is > 2, return the original var,
// if original var count != 1, return g_vars.end()/Invalid
size_t orig_global_count = 0;
auto var_range = g_vars.equal_range(hostVar);
for (auto it = var_range.first; it != var_range.second; ++it) {
// when dyn_undef is set, it is a shadow var
if (it->second.dyn_undef == false) {
++orig_global_count;
dvar = &(it->second);
}
}
dvar = ((orig_global_count == 1) ? dvar : nullptr);
}
}
return dvar;
}
hipError_t ihipGetGlobalVar(hipDeviceptr_t* dev_ptr, size_t* size_ptr,
const char* hostVar, hipModule_t hmod) {
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
if (!ctx) return hipErrorInvalidValue;
auto device = ctx->getDevice();
if (!device) return hipErrorInvalidValue;
ihipDevice_t* currentDevice = ihipGetDevice(device->_deviceId);
if (!currentDevice) return hipErrorInvalidValue;
int deviceId = device->_deviceId;
DeviceVar* dvar = findVar(std::string(hostVar), deviceId, hmod);
if (dvar == nullptr) return hipErrorInvalidValue;
if (dvar->rvars[deviceId].getdeviceptr() == nullptr) return hipErrorInvalidValue;
*size_ptr = dvar->rvars[deviceId].getvarsize();
*dev_ptr = dvar->rvars[deviceId].getdeviceptr();
return hipSuccess;
}
static bool createGlobalVarObj(const hsa_executable_t& hsaExecutable, const hsa_agent_t& hasAgent,
const char* global_name, void** device_pptr, size_t* bytes) {
hsa_status_t status = HSA_STATUS_SUCCESS;
hsa_symbol_kind_t sym_type;
hsa_executable_symbol_t global_symbol;
std::string buildLog;
/* Find HSA Symbol by name */
status = hsa_executable_get_symbol_by_name(hsaExecutable, global_name, &hasAgent,
&global_symbol);
if (status != HSA_STATUS_SUCCESS) {
buildLog += "Error: Failed to find the Symbol by Name: ";
buildLog += hsa_strerror(status);
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
return false;
}
/* Find HSA Symbol Type */
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE,
&sym_type);
if (status != HSA_STATUS_SUCCESS) {
buildLog += "Error: Failed to find the Symbol Type : ";
buildLog += hsa_strerror(status);
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
return false;
}
/* Make sure symbol type is VARIABLE */
if (sym_type != HSA_SYMBOL_KIND_VARIABLE) {
buildLog += "Error: Symbol is not of type VARIABLE : ";
buildLog += hsa_strerror(status);
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
return false;
}
/* Retrieve the size of the variable */
status = hsa_executable_symbol_get_info(global_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, bytes);
if (status != HSA_STATUS_SUCCESS) {
buildLog += "Error: Failed to retrieve the Symbol Size : ";
buildLog += hsa_strerror(status);
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
return false;
}
/* Find HSA Symbol Address */
status = hsa_executable_symbol_get_info(global_symbol,
HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, device_pptr);
if (status != HSA_STATUS_SUCCESS) {
buildLog += "Error: Failed to find the Symbol Address : ";
buildLog += hsa_strerror(status);
tprintf(DB_FB, "createGlobalVarObj: %s\n", buildLog.c_str());
return false;
} else {
tprintf(DB_FB, "createGlobalVarObj: var %s : device=%p, size=%zu\n", global_name, *device_pptr, *bytes);
}
return true;
}
// Registers a device-side global variable.
// For each global variable in device code, there is a corresponding shadow
// global variable in host code. The shadow host variable is used to keep
// track of the value of the device side global variable between kernel
// executions.
// The basic logic is taken from ROCclr RT, but there is much difference.
extern "C" void __hipRegisterVar(
std::vector<hipModule_t>* modules, // The device modules containing code object
char* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
const char* deviceVar, // Variable name in device code
int ext, // Whether this variable is external
int size, // Size of the variable
int constant, // Whether this variable is constant
int global) // Unknown, always 0
{
HIP_INIT_API(__hipRegisterVar, modules, var, hostVar, deviceVar, ext, size, constant, global);
DeviceVar dvar{var, std::string{ hostVar }, static_cast<size_t>(size), modules,
std::vector<RegisteredVar>{ g_deviceCnt }, false };
for (int deviceId = 0; deviceId < g_deviceCnt; deviceId++) {
auto device = ihipGetDevice(deviceId);
if(!device) {
continue;
}
hsa_executable_t& executable = (*modules)[deviceId]->executable;
hsa_agent_t& agent = g_allAgents[deviceId + 1];
size_t bytes = 0;
hipDeviceptr_t devicePtr = nullptr;
bool success = createGlobalVarObj(executable, agent, hostVar, &devicePtr, &bytes);
if(!success) {
return;
}
dvar.rvars[deviceId].devicePtr_ = devicePtr;
dvar.rvars[deviceId].size_ = bytes;
hc::AmPointerInfo ptrInfo(nullptr, devicePtr, devicePtr, bytes, device->_acc, true, false);
hc::am_memtracker_add(devicePtr, ptrInfo);
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
#else
hc::am_memtracker_update(devicePtr, device->_deviceId, 0u);
#endif
}
g_vars.insert(std::make_pair(std::string(hostVar), dvar));
}
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
{
std::for_each(modules->begin(), modules->end(), [](hipModule_t module){ delete module; });
delete modules;
}
hipError_t hipConfigureCall(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
crit->_execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
return hipSuccess;
}
extern "C" hipError_t __hipPushCallConfiguration(
dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream)
{
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
crit->_execStack.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
return hipSuccess;
}
extern "C" hipError_t __hipPopCallConfiguration(
dim3 *gridDim,
dim3 *blockDim,
size_t *sharedMem,
hipStream_t *stream)
{
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
ihipExec_t exec;
exec = std::move(crit->_execStack.top());
crit->_execStack.pop();
*gridDim = exec._gridDim;
*blockDim = exec._blockDim;
*sharedMem = exec._sharedMem;
*stream = exec._hStream;
return hipSuccess;
}
int getCurrentDeviceId()
{
GET_TLS();
int deviceId = 0;
auto ctx = ihipGetTlsDefaultCtx();
if(!ctx) return deviceId;
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
if(crit->_execStack.size() != 0)
{
auto &exec = crit->_execStack.top();
if (exec._hStream) {
deviceId = exec._hStream->getDevice()->_deviceId;
} else if (ctx->getDevice()) {
deviceId = ctx->getDevice()->_deviceId;
}
} else if (ctx->getDevice()) {
deviceId = ctx->getDevice()->_deviceId;
}
return deviceId;
}
hipFunction_t ihipGetDeviceFunction(const void *hostFunction)
{
int deviceId = getCurrentDeviceId();
auto it = g_functions.find(hostFunction);
if (it == g_functions.end() || !it->second[deviceId]) {
return nullptr;
}
return it->second[deviceId];
}
hipError_t hipSetupArgument(
const void *arg,
size_t size,
size_t offset)
{
HIP_INIT_API(hipSetupArgument, arg, size, offset);
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
auto& arguments = crit->_execStack.top()._arguments;
if (arguments.size() < offset + size) {
arguments.resize(offset + size);
}
::memcpy(&arguments[offset], arg, size);
return hipSuccess;
}
hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(hipLaunchByPtr, hostFunction);
ihipExec_t exec;
{
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
exec = std::move(crit->_execStack.top());
crit->_execStack.pop();
}
int deviceId;
if (exec._hStream) {
deviceId = exec._hStream->getDevice()->_deviceId;
}
else if (ihipGetTlsDefaultCtx() && ihipGetTlsDefaultCtx()->getDevice()) {
deviceId = ihipGetTlsDefaultCtx()->getDevice()->_deviceId;
}
else {
deviceId = 0;
}
hipError_t e = hipSuccess;
decltype(g_functions)::iterator it;
if ((it = g_functions.find(hostFunction)) == g_functions.end() ||
!it->second[deviceId]) {
e = hipErrorUnknown;
fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p"
" for device %d!\n", hostFunction, deviceId);
abort();
} else {
size_t size = exec._arguments.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
e = hipModuleLaunchKernel(it->second[deviceId],
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
exec._sharedMem, exec._hStream, nullptr, extra);
}
return ihipLogStatus(e);
}
#ifdef __GNUC__
#pragma GCC visibility pop
#endif
-332
Просмотреть файл
@@ -1,332 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
//---
// Driver initialization and reporting:
#include <stack>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
void ihipCtxStackUpdate() {
GET_TLS();
if (tls->ctxStack.empty()) {
tls->ctxStack.push(ihipGetTlsDefaultCtx());
}
}
hipError_t hipInit(unsigned int flags) {
HIP_INIT_API(hipInit, flags);
hipError_t e = hipSuccess;
// Flags must be 0
if (flags != 0) {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipCtxCreate(hipCtx_t* ctx, unsigned int flags, hipDevice_t device) {
HIP_INIT_API(hipCtxCreate, ctx, flags, device); // FIXME - review if we want to init
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(device);
{
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_DeviceCrit_t deviceCrit(deviceHandle->criticalData());
auto ictx = new ihipCtx_t(deviceHandle, g_deviceCnt, flags);
*ctx = ictx;
ihipSetTlsDefaultCtx(*ctx);
tls->ctxStack.push(*ctx);
tls->getPrimaryCtx = false;
deviceCrit->addContext(ictx);
}
return ihipLogStatus(e);
}
hipError_t hipDeviceGet(hipDevice_t* device, int deviceId) {
HIP_INIT_API(hipDeviceGet, device, deviceId); // FIXME - review if we want to init
auto deviceHandle = ihipGetDevice(deviceId);
hipError_t e = hipSuccess;
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
} else {
*device = deviceId;
}
return ihipLogStatus(e);
};
hipError_t hipDriverGetVersion(int* driverVersion) {
HIP_INIT_API(hipDriverGetVersion, driverVersion);
hipError_t e = hipSuccess;
if (driverVersion) {
*driverVersion = 4;
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipRuntimeGetVersion(int* runtimeVersion) {
HIP_INIT_API(hipRuntimeGetVersion, runtimeVersion);
hipError_t e = hipSuccess;
if (runtimeVersion) {
*runtimeVersion = HIP_VERSION_PATCH;
} else {
e = hipErrorInvalidValue;
}
return ihipLogStatus(e);
}
hipError_t hipCtxDestroy(hipCtx_t ctx) {
HIP_INIT_API(hipCtxDestroy, ctx);
hipError_t e = hipSuccess;
ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx();
ihipCtx_t* primaryCtx = ((ihipDevice_t*)ctx->getDevice())->_primaryCtx;
if (primaryCtx == ctx) {
e = hipErrorInvalidValue;
} else {
if (currentCtx == ctx) {
// need to destroy the ctx associated with calling thread
tls->ctxStack.pop();
}
{
auto deviceHandle = ctx->getWriteableDevice();
deviceHandle->locked_removeContext(ctx);
ctx->locked_reset();
}
delete ctx; // As per CUDA docs , attempting to access ctx from those threads which has
// this ctx as current, will result in the error HIP_ERROR_CONTEXT_IS_DESTROYED.
}
return ihipLogStatus(e);
}
hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxPopCurrent, ctx);
hipError_t e = hipSuccess;
ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx();
auto deviceHandle = currentCtx->getDevice();
*ctx = currentCtx;
if (!tls->ctxStack.empty()) {
tls->ctxStack.pop();
}
if (!tls->ctxStack.empty()) {
currentCtx = tls->ctxStack.top();
} else {
currentCtx = deviceHandle->_primaryCtx;
}
ihipSetTlsDefaultCtx(currentCtx); // TOD0 - Shall check for NULL?
return ihipLogStatus(e);
}
hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxPushCurrent, ctx);
hipError_t e = hipSuccess;
if (ctx != NULL) { // TODO- is this check needed?
ihipSetTlsDefaultCtx(ctx);
tls->ctxStack.push(ctx);
tls->getPrimaryCtx = false;
} else {
e = hipErrorInvalidContext;
}
return ihipLogStatus(e);
}
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxGetCurrent, ctx);
hipError_t e = hipSuccess;
if ((tls->getPrimaryCtx) || tls->ctxStack.empty()) {
*ctx = ihipGetTlsDefaultCtx();
} else {
*ctx = tls->ctxStack.top();
}
return ihipLogStatus(e);
}
hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxSetCurrent, ctx);
hipError_t e = hipSuccess;
if (ctx == NULL) {
tls->ctxStack.pop();
} else {
ihipSetTlsDefaultCtx(ctx);
tls->ctxStack.push(ctx);
tls->getPrimaryCtx = false;
}
return ihipLogStatus(e);
}
hipError_t hipCtxGetDevice(hipDevice_t* device) {
HIP_INIT_API(hipCtxGetDevice, device);
hipError_t e = hipSuccess;
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
if (ctx == nullptr) {
e = hipErrorInvalidContext;
// TODO *device = nullptr;
} else {
auto deviceHandle = ctx->getDevice();
*device = deviceHandle->_deviceId;
}
return ihipLogStatus(e);
}
hipError_t hipCtxGetApiVersion(hipCtx_t ctx, int* apiVersion) {
HIP_INIT_API(hipCtxGetApiVersion, apiVersion);
if (apiVersion) {
*apiVersion = 4;
}
return ihipLogStatus(hipSuccess);
}
hipError_t hipCtxGetCacheConfig(hipFuncCache_t* cacheConfig) {
HIP_INIT_API(hipCtxGetCacheConfig, cacheConfig);
*cacheConfig = hipFuncCachePreferNone;
return ihipLogStatus(hipSuccess);
}
hipError_t hipCtxSetCacheConfig(hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipCtxSetCacheConfig, cacheConfig);
// Nop, AMD does not support variable cache configs.
return ihipLogStatus(hipSuccess);
}
hipError_t hipCtxSetSharedMemConfig(hipSharedMemConfig config) {
HIP_INIT_API(hipCtxSetSharedMemConfig, config);
// Nop, AMD does not support variable shared mem configs.
return ihipLogStatus(hipSuccess);
}
hipError_t hipCtxGetSharedMemConfig(hipSharedMemConfig* pConfig) {
HIP_INIT_API(hipCtxGetSharedMemConfig, pConfig);
*pConfig = hipSharedMemBankSizeFourByte;
return ihipLogStatus(hipSuccess);
}
hipError_t hipCtxSynchronize(void) {
HIP_INIT_API(hipCtxSynchronize, 1);
return ihipLogStatus(ihipSynchronize(tls)); // TODO Shall check validity of ctx?
}
hipError_t hipCtxGetFlags(unsigned int* flags) {
HIP_INIT_API(hipCtxGetFlags, flags);
hipError_t e = hipSuccess;
ihipCtx_t* tempCtx;
tempCtx = ihipGetTlsDefaultCtx();
*flags = tempCtx->_ctxFlags;
return ihipLogStatus(e);
}
hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int* active) {
HIP_INIT_API(hipDevicePrimaryCtxGetState, dev, flags, active);
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(dev);
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
}
ihipCtx_t* tempCtx;
tempCtx = ihipGetTlsDefaultCtx();
ihipCtx_t* primaryCtx = deviceHandle->_primaryCtx;
if (tempCtx == primaryCtx) {
*active = 1;
*flags = tempCtx->_ctxFlags;
} else {
*active = 0;
*flags = primaryCtx->_ctxFlags;
}
return ihipLogStatus(e);
}
hipError_t hipDevicePrimaryCtxRelease(hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxRelease, dev);
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(dev);
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
}
return ihipLogStatus(e);
}
hipError_t hipDevicePrimaryCtxRetain(hipCtx_t* pctx, hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxRetain, pctx, dev);
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(dev);
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
}
*pctx = deviceHandle->_primaryCtx;
return ihipLogStatus(e);
}
hipError_t hipDevicePrimaryCtxReset(hipDevice_t dev) {
HIP_INIT_API(hipDevicePrimaryCtxReset, dev);
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(dev);
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
}
ihipCtx_t* primaryCtx = deviceHandle->_primaryCtx;
primaryCtx->locked_reset();
return ihipLogStatus(e);
}
hipError_t hipDevicePrimaryCtxSetFlags(hipDevice_t dev, unsigned int flags) {
HIP_INIT_API(hipDevicePrimaryCtxSetFlags, dev, flags);
hipError_t e = hipSuccess;
auto deviceHandle = ihipGetDevice(dev);
if (deviceHandle == NULL) {
e = hipErrorInvalidDevice;
} else {
e = hipErrorContextAlreadyInUse;
}
return ihipLogStatus(e);
}
-648
Просмотреть файл
@@ -1,648 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include "device_util.h"
//-------------------------------------------------------------------------------------------------
// Devices
//-------------------------------------------------------------------------------------------------
// TODO - does this initialize HIP runtime?
hipError_t hipGetDevice(int* deviceId) {
HIP_INIT_API(hipGetDevice, deviceId);
hipError_t e = hipSuccess;
if (deviceId == nullptr)
return ihipLogStatus(hipErrorInvalidValue);
auto ctx = ihipGetTlsDefaultCtx();
if (ctx == nullptr) {
e = hipErrorInvalidDevice; // TODO, check error code.
*deviceId = -1;
} else {
*deviceId = ctx->getDevice()->_deviceId;
}
return ihipLogStatus(e);
}
// TODO - does this initialize HIP runtime?
hipError_t ihipGetDeviceCount(int* count) {
hipError_t e = hipSuccess;
if (count != nullptr) {
*count = g_deviceCnt;
if (*count > 0) {
e = hipSuccess;
} else {
e = hipErrorNoDevice;
}
} else {
e = hipErrorInvalidValue;
}
return e;
}
hipError_t hipGetDeviceCount(int* count) {
HIP_INIT_API(hipGetDeviceCount, count);
return ihipLogStatus(ihipGetDeviceCount(count));
}
hipError_t hipDeviceSetCacheConfig(hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipDeviceSetCacheConfig, cacheConfig);
// Nop, AMD does not support variable cache configs.
return ihipLogStatus(hipSuccess);
}
hipError_t hipDeviceGetCacheConfig(hipFuncCache_t* cacheConfig) {
HIP_INIT_API(hipDeviceGetCacheConfig, cacheConfig);
if (cacheConfig == nullptr) {
return ihipLogStatus(hipErrorInvalidValue);
}
*cacheConfig = hipFuncCachePreferNone;
return ihipLogStatus(hipSuccess);
}
hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
HIP_INIT_API(hipDeviceGetLimit, pValue, limit);
if (pValue == nullptr) {
return ihipLogStatus(hipErrorInvalidValue);
}
#if __HIP_ENABLE_DEVICE_MALLOC__
if (limit == hipLimitMallocHeapSize) {
*pValue = (size_t)__HIP_SIZE_OF_HEAP;
return ihipLogStatus(hipSuccess);
}
#endif
return ihipLogStatus(hipErrorUnsupportedLimit);
}
hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) {
HIP_INIT_API(hipFuncSetCacheConfig, cacheConfig);
// Nop, AMD does not support variable cache configs.
return ihipLogStatus(hipSuccess);
}
hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) {
HIP_INIT_API(hipDeviceSetSharedMemConfig, config);
// Nop, AMD does not support variable shared mem configs.
return ihipLogStatus(hipSuccess);
}
hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig* pConfig) {
HIP_INIT_API(hipDeviceGetSharedMemConfig, pConfig);
*pConfig = hipSharedMemBankSizeFourByte;
return ihipLogStatus(hipSuccess);
}
hipError_t hipSetDevice(int deviceId) {
HIP_INIT_API(hipSetDevice, deviceId);
if ((deviceId < 0) || (deviceId >= g_deviceCnt)) {
return ihipLogStatus(hipErrorInvalidDevice);
} else {
ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId));
tls->getPrimaryCtx = true;
return ihipLogStatus(hipSuccess);
}
}
hipError_t hipDeviceSynchronize(void) {
HIP_INIT_SPECIAL_API(hipDeviceSynchronize, TRACE_SYNC);
return ihipLogStatus(ihipSynchronize(tls));
}
hipError_t hipDeviceReset(void) {
HIP_INIT_API(hipDeviceReset, );
auto* ctx = ihipGetTlsDefaultCtx();
// TODO-HCC
// This function currently does a user-level cleanup of known resources.
// It could benefit from KFD support to perform a more "nuclear" clean that would include any
// associated kernel resources and page table entries.
#if 0
if (ctx) {
// Release ctx resources (streams and memory):
ctx->locked_reset();
}
#endif
if (ctx) {
ihipDevice_t* deviceHandle = ctx->getWriteableDevice();
deviceHandle->locked_reset();
}
return ihipLogStatus(hipSuccess);
}
hipError_t ihipDeviceSetState(TlsData *tls) {
hipError_t e = hipErrorInvalidContext;
auto* ctx = ihipGetTlsDefaultCtx();
if (ctx) {
ihipDevice_t* deviceHandle = ctx->getWriteableDevice();
if (deviceHandle->_state == 0) {
deviceHandle->_state = 1;
}
e = hipSuccess;
}
return e;
}
hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
hipError_t e = hipSuccess;
if (pi == nullptr) {
return hipErrorInvalidValue;
}
auto* hipDevice = ihipGetDevice(device);
hipDeviceProp_t* prop = &hipDevice->_props;
if (hipDevice) {
switch (attr) {
case hipDeviceAttributeMaxThreadsPerBlock:
*pi = prop->maxThreadsPerBlock;
break;
case hipDeviceAttributeMaxBlockDimX:
*pi = prop->maxThreadsDim[0];
break;
case hipDeviceAttributeMaxBlockDimY:
*pi = prop->maxThreadsDim[1];
break;
case hipDeviceAttributeMaxBlockDimZ:
*pi = prop->maxThreadsDim[2];
break;
case hipDeviceAttributeMaxGridDimX:
*pi = prop->maxGridSize[0];
break;
case hipDeviceAttributeMaxGridDimY:
*pi = prop->maxGridSize[1];
break;
case hipDeviceAttributeMaxGridDimZ:
*pi = prop->maxGridSize[2];
break;
case hipDeviceAttributeMaxSharedMemoryPerBlock:
*pi = prop->sharedMemPerBlock;
break;
case hipDeviceAttributeTotalConstantMemory:
*pi = prop->totalConstMem;
break;
case hipDeviceAttributeWarpSize:
*pi = prop->warpSize;
break;
case hipDeviceAttributeMaxRegistersPerBlock:
*pi = prop->regsPerBlock;
break;
case hipDeviceAttributeClockRate:
*pi = prop->clockRate;
break;
case hipDeviceAttributeMemoryClockRate:
*pi = prop->memoryClockRate;
break;
case hipDeviceAttributeMemoryBusWidth:
*pi = prop->memoryBusWidth;
break;
case hipDeviceAttributeMultiprocessorCount:
*pi = prop->multiProcessorCount;
break;
case hipDeviceAttributeComputeMode:
*pi = prop->computeMode;
break;
case hipDeviceAttributeL2CacheSize:
*pi = prop->l2CacheSize;
break;
case hipDeviceAttributeMaxThreadsPerMultiProcessor:
*pi = prop->maxThreadsPerMultiProcessor;
break;
case hipDeviceAttributeComputeCapabilityMajor:
*pi = prop->major;
break;
case hipDeviceAttributeComputeCapabilityMinor:
*pi = prop->minor;
break;
case hipDeviceAttributePciBusId:
*pi = prop->pciBusID;
break;
case hipDeviceAttributeConcurrentKernels:
*pi = prop->concurrentKernels;
break;
case hipDeviceAttributePciDeviceId:
*pi = prop->pciDeviceID;
break;
case hipDeviceAttributeMaxSharedMemoryPerMultiprocessor:
*pi = prop->maxSharedMemoryPerMultiProcessor;
break;
case hipDeviceAttributeIsMultiGpuBoard:
*pi = prop->isMultiGpuBoard;
break;
case hipDeviceAttributeIntegrated:
*pi = prop->integrated;
break;
case hipDeviceAttributeMaxTexture1DWidth:
*pi = prop->maxTexture1D;
break;
case hipDeviceAttributeMaxTexture2DWidth:
*pi = prop->maxTexture2D[0];
break;
case hipDeviceAttributeMaxTexture2DHeight:
*pi = prop->maxTexture2D[1];
break;
case hipDeviceAttributeMaxTexture3DWidth:
*pi = prop->maxTexture3D[0];
break;
case hipDeviceAttributeMaxTexture3DHeight:
*pi = prop->maxTexture3D[1];
break;
case hipDeviceAttributeMaxTexture3DDepth:
*pi = prop->maxTexture3D[2];
break;
case hipDeviceAttributeHdpMemFlushCntl:
{
uint32_t** hdp = reinterpret_cast<uint32_t**>(pi);
*hdp = prop->hdpMemFlushCntl;
}
break;
case hipDeviceAttributeHdpRegFlushCntl:
{
uint32_t** hdp = reinterpret_cast<uint32_t**>(pi);
*hdp = prop->hdpRegFlushCntl;
}
break;
case hipDeviceAttributeCooperativeLaunch:
*pi = prop->cooperativeLaunch;
break;
case hipDeviceAttributeCooperativeMultiDeviceLaunch:
*pi = prop->cooperativeMultiDeviceLaunch;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc:
*pi = prop->cooperativeMultiDeviceUnmatchedFunc;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim:
*pi = prop->cooperativeMultiDeviceUnmatchedGridDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim:
*pi = prop->cooperativeMultiDeviceUnmatchedBlockDim;
break;
case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem:
*pi = prop->cooperativeMultiDeviceUnmatchedSharedMem;
break;
case hipDeviceAttributeMaxPitch:
*pi = prop->memPitch;
break;
case hipDeviceAttributeTextureAlignment:
*pi = prop->textureAlignment;
break;
case hipDeviceAttributeTexturePitchAlignment:
*pi = prop->texturePitchAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
*pi = prop->kernelExecTimeoutEnabled;
break;
case hipDeviceAttributeCanMapHostMemory:
*pi = prop->canMapHostMemory;
break;
case hipDeviceAttributeEccEnabled:
*pi = prop->ECCEnabled;
break;
default:
e = hipErrorInvalidValue;
break;
}
} else {
e = hipErrorInvalidDevice;
}
return e;
}
hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) {
HIP_INIT_API(hipDeviceGetAttribute, pi, attr, device);
if ((device < 0) || (device >= g_deviceCnt)) {
return ihipLogStatus(hipErrorInvalidDevice);
}
return ihipLogStatus(ihipDeviceGetAttribute(pi, attr, device));
}
hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, int device) {
hipError_t e;
if (props != nullptr) {
auto* hipDevice = ihipGetDevice(device);
if (hipDevice) {
// copy saved props
*props = hipDevice->_props;
e = hipSuccess;
} else {
e = hipErrorInvalidDevice;
}
} else {
e = hipErrorInvalidDevice;
}
return e;
}
hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) {
HIP_INIT_API(hipGetDeviceProperties, props, device);
if ((device < 0) || (device >= g_deviceCnt)) {
return ihipLogStatus(hipErrorInvalidDevice);
}
return ihipLogStatus(ihipGetDeviceProperties(props, device));
}
hipError_t hipSetDeviceFlags(unsigned int flags) {
HIP_INIT_API(hipSetDeviceFlags, flags);
hipError_t e = hipSuccess;
auto* ctx = ihipGetTlsDefaultCtx();
// TODO : does this really OR in the flags or replaces previous flags:
// TODO : Review error handling behavior for this function, it often returns
// ErrorSetOnActiveProcess
if (ctx) {
auto* deviceHandle = ctx->getDevice();
if (deviceHandle->_state == 0) {
ctx->_ctxFlags = ctx->_ctxFlags | flags;
if (flags & hipDeviceScheduleMask) {
switch (hipDeviceScheduleMask) {
case hipDeviceScheduleAuto:
case hipDeviceScheduleSpin:
case hipDeviceScheduleYield:
case hipDeviceScheduleBlockingSync:
e = hipSuccess;
break;
default:
e = hipSuccess; // TODO - should this be error? Map to Auto?
// e = hipErrorInvalidValue;
break;
}
}
unsigned supportedFlags =
hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax;
if (flags & (~supportedFlags)) {
e = hipErrorInvalidValue;
}
} else {
e = hipErrorSetOnActiveProcess;
}
} else {
e = hipErrorInvalidDevice;
}
return ihipLogStatus(e);
};
hipError_t hipDeviceComputeCapability(int* major, int* minor, hipDevice_t device) {
HIP_INIT_API(hipDeviceComputeCapability, major, minor, device);
hipError_t e = hipSuccess;
if ((device < 0) || (device >= g_deviceCnt)) {
e = hipErrorInvalidDevice;
} else {
e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device);
e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device);
}
return ihipLogStatus(e);
}
hipError_t hipDeviceGetName(char* name, int len, hipDevice_t device) {
// Cast to void* here to avoid printing garbage in debug modes.
HIP_INIT_API(hipDeviceGetName, (void*)name, len, device);
hipError_t e = hipSuccess;
if ((device < 0) || (device >= g_deviceCnt)) {
e = hipErrorInvalidDevice;
} else {
auto deviceHandle = ihipGetDevice(device);
int nameLen = strlen(deviceHandle->_props.name);
if (nameLen <= len) memcpy(name, deviceHandle->_props.name, nameLen);
}
return ihipLogStatus(e);
}
hipError_t hipDeviceGetPCIBusId(char* pciBusId, int len, int device) {
// Cast to void* here to avoid printing garbage in debug modes.
HIP_INIT_API(hipDeviceGetPCIBusId, (void*)pciBusId, len, device);
hipError_t e = hipErrorInvalidValue;
if ((device < 0) || (device >= g_deviceCnt)) {
e = hipErrorInvalidDevice;
} else {
if ((pciBusId != nullptr) && (len > 0)) {
auto deviceHandle = ihipGetDevice(device);
int retVal =
snprintf(pciBusId, len, "%04x:%02x:%02x.0", deviceHandle->_props.pciDomainID,
deviceHandle->_props.pciBusID, deviceHandle->_props.pciDeviceID);
if (retVal > 0 && retVal < len) {
e = hipSuccess;
}
}
}
return ihipLogStatus(e);
}
hipError_t hipDeviceTotalMem(size_t* bytes, hipDevice_t device) {
HIP_INIT_API(hipDeviceTotalMem, bytes, device);
hipError_t e = hipSuccess;
if ((device < 0) || (device >= g_deviceCnt)) {
e = hipErrorInvalidDevice;
} else {
auto deviceHandle = ihipGetDevice(device);
*bytes = deviceHandle->_props.totalGlobalMem;
}
return ihipLogStatus(e);
}
hipError_t hipDeviceGetByPCIBusId(int* device, const char* pciBusId) {
HIP_INIT_API(hipDeviceGetByPCIBusId, device, pciBusId);
hipDeviceProp_t tempProp;
int deviceCount = 0;
hipError_t e = hipErrorInvalidValue;
if ((device != nullptr) && (pciBusId != nullptr)) {
int pciBusID = -1;
int pciDeviceID = -1;
int pciDomainID = -1;
int len = 0;
len = sscanf(pciBusId, "%04x:%02x:%02x", &pciDomainID, &pciBusID, &pciDeviceID);
if (len == 3) {
ihipGetDeviceCount(&deviceCount);
for (int i = 0; i < deviceCount; i++) {
ihipGetDeviceProperties(&tempProp, i);
if (tempProp.pciBusID == pciBusID) {
*device = i;
e = hipSuccess;
break;
}
}
}
}
return ihipLogStatus(e);
}
hipError_t hipChooseDevice(int* device, const hipDeviceProp_t* prop) {
HIP_INIT_API(hipChooseDevice, device, prop);
hipDeviceProp_t tempProp;
hipError_t e = hipSuccess;
if ((device == NULL) || (prop == NULL)) {
e = hipErrorInvalidValue;
}
if (e == hipSuccess) {
int deviceCount;
int inPropCount = 0;
int matchedPropCount = 0;
ihipGetDeviceCount(&deviceCount);
*device = 0;
for (int i = 0; i < deviceCount; i++) {
ihipGetDeviceProperties(&tempProp, i);
if (prop->major != 0) {
inPropCount++;
if (tempProp.major >= prop->major) {
matchedPropCount++;
}
if (prop->minor != 0) {
inPropCount++;
if (tempProp.minor >= prop->minor) {
matchedPropCount++;
}
}
}
if (prop->totalGlobalMem != 0) {
inPropCount++;
if (tempProp.totalGlobalMem >= prop->totalGlobalMem) {
matchedPropCount++;
}
}
if (prop->sharedMemPerBlock != 0) {
inPropCount++;
if (tempProp.sharedMemPerBlock >= prop->sharedMemPerBlock) {
matchedPropCount++;
}
}
if (prop->maxThreadsPerBlock != 0) {
inPropCount++;
if (tempProp.maxThreadsPerBlock >= prop->maxThreadsPerBlock) {
matchedPropCount++;
}
}
if (prop->totalConstMem != 0) {
inPropCount++;
if (tempProp.totalConstMem >= prop->totalConstMem) {
matchedPropCount++;
}
}
if (prop->multiProcessorCount != 0) {
inPropCount++;
if (tempProp.multiProcessorCount >= prop->multiProcessorCount) {
matchedPropCount++;
}
}
if (prop->maxThreadsPerMultiProcessor != 0) {
inPropCount++;
if (tempProp.maxThreadsPerMultiProcessor >= prop->maxThreadsPerMultiProcessor) {
matchedPropCount++;
}
}
if (prop->memoryClockRate != 0) {
inPropCount++;
if (tempProp.memoryClockRate >= prop->memoryClockRate) {
matchedPropCount++;
}
}
if (inPropCount == matchedPropCount) {
*device = i;
}
#if 0
else{
e= hipErrorInvalidValue;
}
#endif
}
}
return ihipLogStatus(e);
}
#define HSA_ERROR_CHECK(hsa_error_code) \
if ((hsa_error_code != HSA_STATUS_SUCCESS) && (hsa_error_code != HSA_STATUS_INFO_BREAK)) { \
return ihipLogStatus(hipErrorRuntimeOther); \
}
hipError_t hipExtGetLinkTypeAndHopCount(int device1, int device2, uint32_t* linktype, uint32_t* hopcount) {
HIP_INIT_API(hipExtGetLinkTypeAndHopCount, device1, device2, linktype, hopcount);
if ((device1 < 0) || (device1 >= g_deviceCnt) || (device2 < 0) || (device2 >= g_deviceCnt)) {
return ihipLogStatus(hipErrorInvalidDevice);
} else {
auto device1Handle = ihipGetDevice(device1);
auto device2Handle = ihipGetDevice(device2);
const auto& find_pool = [](hsa_amd_memory_pool_t pool, void* data) {
bool allowed;
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &allowed);
if (allowed) {
hsa_amd_segment_t segment;
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
if (HSA_AMD_SEGMENT_GLOBAL != segment) return HSA_STATUS_SUCCESS;
uint32_t flags;
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
if (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED) {
*((hsa_amd_memory_pool_t*)data) = pool;
return HSA_STATUS_INFO_BREAK;
}
}
return HSA_STATUS_SUCCESS;
};
hsa_status_t err;
hsa_amd_memory_pool_t pool;
err = hsa_amd_agent_iterate_memory_pools(device2Handle->_hsaAgent, find_pool, (void*)&pool);
HSA_ERROR_CHECK(err);
hsa_amd_memory_pool_link_info_t link_info;
err = hsa_amd_agent_memory_pool_get_info(device1Handle->_hsaAgent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, &link_info);
HSA_ERROR_CHECK(err);
*linktype = link_info.link_type;
if (link_info.numa_distance < 30)
*hopcount = 1;
else
*hopcount = 2;
return ihipLogStatus(hipSuccess);
}
}
-61
Просмотреть файл
@@ -1,61 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Error Handling
//---
hipError_t hipGetLastError() {
HIP_INIT_API(hipGetLastError);
// Return last error, but then reset the state:
hipError_t e = ihipLogStatus(tls->lastHipError);
tls->lastHipError = hipSuccess;
return e;
}
hipError_t hipPeekAtLastError() {
HIP_INIT_API(hipPeekAtLastError);
// peek at last error, but don't reset it.
return ihipLogStatus(tls->lastHipError);
}
const char* hipGetErrorName(hipError_t hip_error) {
HIP_INIT_API(hipGetErrorName, hip_error);
return ihipErrorString(hip_error);
}
const char* hipGetErrorString(hipError_t hip_error) {
HIP_INIT_API(hipGetErrorString, hip_error);
// TODO - return a message explaining the error.
// TODO - This should be set up to return the same string reported in the the doxygen comments,
// somehow.
return hipGetErrorName(hip_error);
}
-483
Просмотреть файл
@@ -1,483 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include <errno.h> // errno, ENOENT
#include <fcntl.h> // O_RDWR, O_CREATE
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
#include <unistd.h> // ftruncate, close
namespace {
inline
const char* hsa_to_string(hsa_status_t err) noexcept
{
const char* r{};
if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r;
return "Unknown.";
}
template<std::size_t m, std::size_t n>
inline
void throwing_result_check(hsa_status_t res, const char (&file)[m],
const char (&function)[n], int line) {
if (res == HSA_STATUS_SUCCESS) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + hsa_to_string(res)};
}
template<std::size_t m, std::size_t n>
inline
void throwing_retval_check(int good, int retval, const char (&file)[m],
const char (&function)[n], int line) {
if (retval == good) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + strerror(retval)};
}
template<std::size_t m, std::size_t n, std::size_t o>
inline
void throwing_msg_check(bool bad, const char (&msg)[o],
const char (&file)[m],
const char (&function)[n], int line) {
if (!bad) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + msg};
}
template<std::size_t m, std::size_t n>
inline
void throwing_errno_check(bool bad, const char (&file)[m],
const char (&function)[n], int line) {
if (!bad) return;
throw std::runtime_error{"Failed in file " + (file +
(", in function \"" + (function +
("\", on line " + std::to_string(line))))) +
", with error: " + strerror(errno)};
}
} // Unnamed namespace.
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Events
//---
ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) {
_flags = flags;
GET_TLS();
auto ctx = ihipGetTlsDefaultCtx();
_deviceId = ctx == nullptr ? -1 : ctx->getDevice()->_deviceId;
};
// Attach to an existing completion future:
void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipStream_t stream,
ihipEventType_t eventType) {
LockedAccessor_EventCrit_t crit(_criticalData);
crit->_eventData.marker(*cf);
crit->_eventData._type = eventType;
crit->_eventData._stream = stream;
crit->_eventData._state = hipEventStatusRecording;
}
static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) {
if (!ecd._ipc_name.empty()) return;
// create random shmem name
char name_template[] = "/tmp/eventXXXXXX";
int temp_fd = mkstemp(name_template);
throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__);
// copy shmem name into event data, reformat to use a single slash
ecd._ipc_name = name_template;
ecd._ipc_name.replace(0, 5, "/hip_");
// open shmem
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777);
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
// size it
throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
// mmap it
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
// initialize shared state
ecd._ipc_shmem->owners = 1;
ecd._ipc_shmem->read_index = -1;
ecd._ipc_shmem->write_index = 0;
for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) {
ecd._ipc_shmem->signal[i] = 0;
}
// remove temp file
throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__);
throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__);
}
static std::pair<hipEventStatus_t, uint64_t> refreshEventStatus(ihipEventData_t &ecd) {
if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) {
if ((ecd._type == hipEventTypeIndependent) ||
(ecd._type == hipEventTypeStopCommand)) {
ecd._timestamp = ecd.marker().get_end_tick();
} else if (ecd._type == hipEventTypeStartCommand) {
ecd._timestamp = ecd.marker().get_begin_tick();
} else {
ecd._timestamp = 0;
assert(0); // TODO - move to debug assert
}
ecd._state = hipEventStatusComplete;
return std::pair<hipEventStatus_t, uint64_t>(ecd._state,
ecd._timestamp);
}
// Not complete path here:
return std::pair<hipEventStatus_t, uint64_t>(ecd._state, ecd._timestamp);
}
hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) {
hipError_t e = hipSuccess;
unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming |
hipEventReleaseToDevice | hipEventReleaseToSystem |
hipEventInterprocess;
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
const bool illegalFlags =
(flags & ~supportedFlags) || // can't set any unsupported flags.
(flags & releaseFlags) == releaseFlags; // can't set both release flags
if (event && !illegalFlags) {
*event = new ihipEvent_t(flags);
} else {
e = hipErrorInvalidValue;
}
return e;
}
hipError_t hipEventCreateWithFlags(hipEvent_t* event, unsigned flags) {
HIP_INIT_API(hipEventCreateWithFlags, event, flags);
return ihipLogStatus(ihipEventCreate(event, flags));
}
hipError_t hipEventCreate(hipEvent_t* event) {
HIP_INIT_API(hipEventCreate, event);
return ihipLogStatus(ihipEventCreate(event, 0));
}
hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
stream = ihipSyncAndResolveStream(stream);
LockedAccessor_EventCrit_t eCrit(event->criticalData());
auto &ecd{eCrit->_eventData};
if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle);
if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) {
// TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0
// If default stream , then wait on all queues.
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
ctx->locked_syncDefaultStream(true, true);
ecd.marker(hc::completion_future()); // reset event
ecd._stream = stream;
ecd._timestamp = hc::get_system_ticks();
ecd._state = hipEventStatusComplete;
// TODO handle IPC case?
}
else {
// Record the event in the stream:
ecd.marker(stream->locked_recordEvent(event));
ecd._stream = stream;
ecd._timestamp = 0;
ecd._state = hipEventStatusRecording;
if (event->_flags & hipEventInterprocess) {
createIpcEventShmemIfNeeded(ecd);
int write_index = ecd._ipc_shmem->write_index++; // fetch add
int offset = write_index % IPC_SIGNALS_PER_EVENT;
// While event still valid and still locked, spin.
while (ecd._ipc_shmem->signal[offset] != 0) {
// TODO backoff
}
// Lock signal.
ecd._ipc_shmem->signal[offset] = 1;
// forward signal state from local signal to IPC signal via host callback
// create callback that can be passed to hsa_amd_signal_async_handler
// this function decrements the IPC signal by 1 to indicate completion
std::atomic<int> *signal = &ecd._ipc_shmem->signal[offset];
auto t{new std::function<void()>{[=]() {
signal->store(0);
}}};
// register above callback with HSA runtime to be called when local signal
// is decremented from 1 to 0 by CP
auto local_signal = *reinterpret_cast<hsa_signal_t*>(eCrit->_eventData.marker().get_native_handle());
hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1,
[](hsa_signal_value_t x, void* p) {
(*static_cast<decltype(t)>(p))();
delete static_cast<decltype(t)>(p);
return false;
}, t);
// Update read index to indicate new signal.
int expected = write_index-1;
while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) {
throwing_msg_check(
expected >= write_index,
"IPC event record update read index failure",
__FILE__, __func__, __LINE__);
expected = write_index-1;
}
}
}
return ihipLogStatus(hipSuccess);
}
hipError_t hipEventDestroy(hipEvent_t event) {
HIP_INIT_API(hipEventDestroy, event);
if (event) {
{
LockedAccessor_EventCrit_t crit(event->criticalData());
auto &ecd{crit->_eventData};
if (ecd._ipc_shmem) {
int owners = --ecd._ipc_shmem->owners;
throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__);
throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__);
if (0 == owners)
throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__);
}
}
delete event;
return ihipLogStatus(hipSuccess);
} else {
return ihipLogStatus(hipErrorInvalidHandle);
}
}
hipError_t hipEventSynchronize(hipEvent_t event) {
HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventReleaseToSystem)) {
tprintf(DB_WARN,
"hipEventSynchronize on event without system-scope fence ; consider creating with "
"hipEventReleaseToSystem\n");
}
auto ecd = event->locked_copyCrit();
if (event->_flags & hipEventInterprocess) {
// this is an IPC event
int previous_read_index = ecd._ipc_shmem->read_index;
if (previous_read_index >= 0) {
// we have at least one recorded event, so proceed
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
// While event still valid and still locked, spin.
while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
// TODO backoff
}
}
return ihipLogStatus(hipSuccess);
}
if (ecd._state == hipEventStatusUnitialized) {
return ihipLogStatus(hipErrorInvalidHandle);
} else if (ecd._state == hipEventStatusCreated) {
// Created but not actually recorded on any device:
return ihipLogStatus(hipSuccess);
} else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) {
auto* ctx = ihipGetTlsDefaultCtx();
// TODO-HIP_SYNC_NULL_STREAM - can remove this code
ctx->locked_syncDefaultStream(true, true);
return ihipLogStatus(hipSuccess);
} else {
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
: hc::hcWaitModeActive);
return ihipLogStatus(hipSuccess);
}
}
hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) {
HIP_INIT_API(hipEventElapsedTime, ms, start, stop);
if (ms == nullptr) return ihipLogStatus(hipErrorInvalidValue);
if ((start == nullptr) || (stop == nullptr) ||
(start->_deviceId != stop->_deviceId))
return ihipLogStatus(hipErrorInvalidHandle);
*ms = 0.0f;
auto startEcd = start->locked_copyCrit();
auto stopEcd = stop->locked_copyCrit();
if ((start->_flags & hipEventDisableTiming) ||
(startEcd._state == hipEventStatusUnitialized) ||
(startEcd._state == hipEventStatusCreated) ||
(stop->_flags & hipEventDisableTiming) ||
(stopEcd._state == hipEventStatusUnitialized) ||
(stopEcd._state == hipEventStatusCreated)) {
// Both events must be at least recorded else return hipErrorInvalidHandle
return ihipLogStatus(hipErrorInvalidHandle);
}
// Refresh status, if still recording...
auto startStatus = refreshEventStatus(startEcd); // pair < state, timestamp >
auto stopStatus = refreshEventStatus(stopEcd); // pair < state, timestamp >
if ((startStatus.first == hipEventStatusComplete) &&
(stopStatus.first == hipEventStatusComplete)) {
// Common case, we have good information for both events. 'second' is the timestamp:
int64_t tickDiff = (stopStatus.second - startStatus.second);
uint64_t freqHz;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &freqHz);
if (freqHz) {
*ms = ((double)(tickDiff) / (double)(freqHz)) * 1000.0f;
return ihipLogStatus(hipSuccess);
} else {
*ms = 0.0f;
return ihipLogStatus(hipErrorInvalidValue);
}
} else if ((startStatus.first == hipEventStatusRecording) ||
(stopStatus.first == hipEventStatusRecording)) {
return ihipLogStatus(hipErrorNotReady);
} else {
assert(0); // TODO should we return hipErrorUnknown ?
}
return ihipLogStatus(hipSuccess);
}
hipError_t hipEventQuery(hipEvent_t event) {
HIP_INIT_SPECIAL_API(hipEventQuery, TRACE_QUERY, event);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventReleaseToSystem)) {
tprintf(DB_WARN,
"hipEventQuery on event without system-scope fence ; consider creating with "
"hipEventReleaseToSystem\n");
}
auto ecd = event->locked_copyCrit();
// this event is either from an ipc handle, or the owner of a local ipc event
if (event->_flags & hipEventInterprocess) {
if (ecd._ipc_shmem) {
int previous_read_index = ecd._ipc_shmem->read_index;
int offset = previous_read_index % IPC_SIGNALS_PER_EVENT;
if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) {
return ihipLogStatus(hipErrorNotReady);
}
else {
return ihipLogStatus(hipSuccess);
}
}
}
// normal event
else {
if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) {
return ihipLogStatus(hipErrorNotReady);
}
}
return ihipLogStatus(hipSuccess);
}
hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event)
{
HIP_INIT_API(hipIpcGetEventHandle, handle, event);
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
if (!handle) return ihipLogStatus(hipErrorInvalidHandle);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle);
if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle);
LockedAccessor_EventCrit_t crit(event->criticalData());
auto &ecd{crit->_eventData};
createIpcEventShmemIfNeeded(ecd);
// copy name into handle
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle;
memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE);
ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos);
return ihipLogStatus(hipSuccess);
#else
return ihipLogStatus(hipErrorNotSupported);
#endif
}
hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle)
{
HIP_INIT_API(hipIpcOpenEventHandle, event, &handle);
#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
// create a new event with timing disabled, per spec
auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess);
if (hip_status != hipSuccess) return ihipLogStatus(hip_status);
LockedAccessor_EventCrit_t crit((*event)->criticalData());
auto &ecd{crit->_eventData};
ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle;
ecd._ipc_name = iHandle->shmem_name;
// open shmem
ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777);
throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__);
// mmap it
ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0);
throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__);
// update shared state
ecd._ipc_shmem->owners += 1;
return ihipLogStatus(hipSuccess);
#else
return ihipLogStatus(hipErrorNotSupported);
#endif
}
-91
Просмотреть файл
@@ -1,91 +0,0 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <string>
#include <fstream>
#include "hip_fatbin.h"
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
void __hipDumpCodeObject(const std::string& image) {
char fname[30];
static std::atomic<int> index;
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
tprintf(DB_FB, "Dump code object %s\n", fname);
std::ofstream ofs;
ofs.open(fname, std::ios::binary);
ofs << image;
ofs.close();
}
// Returns a pointer to the code object in the fatbin. The pointer should not
// be freed.
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
const char* agent_name)
{
hip_impl::hip_init();
tprintf(DB_FB, "Enter __hipExtractCodeObjectFromFatBinary(%p, \"%s\")\n",
data, agent_name);
const __ClangOffloadBundleHeader* header
= reinterpret_cast<const __ClangOffloadBundleHeader*>(data);
std::string magic(reinterpret_cast<const char*>(header),
sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1);
if (magic.compare(CLANG_OFFLOAD_BUNDLER_MAGIC)) {
return nullptr;
}
const __ClangOffloadBundleDesc* desc = &header->desc[0];
for (uint64_t i = 0; i < header->numBundles; ++i,
desc = reinterpret_cast<const __ClangOffloadBundleDesc*>(
reinterpret_cast<uintptr_t>(&desc->triple[0]) + desc->tripleSize)) {
std::string triple{&desc->triple[0], sizeof(AMDGCN_AMDHSA_TRIPLE) - 1};
if (triple.compare(AMDGCN_AMDHSA_TRIPLE))
continue;
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
tprintf(DB_FB, "Found hip-clang bundle for %s\n", target.c_str());
if (target.compare(agent_name)) {
continue;
}
auto *codeobj = reinterpret_cast<const char*>(
reinterpret_cast<uintptr_t>(header) + desc->offset);
if (HIP_DUMP_CODE_OBJECT)
__hipDumpCodeObject(std::string{codeobj, desc->size});
tprintf(DB_FB, "__hipExtractCodeObjectFromFatBinary succeeds and returns %p\n",
codeobj);
return codeobj;
}
// hipcc --genco for HCC generates fat binaries with different triple strings.
// It will reach here and return a null pointer. The fat binary itself will
// be handled in a different place.
tprintf(DB_FB, "No hip-clang device code bundle for %s\n", agent_name);
return nullptr;
}
-58
Просмотреть файл
@@ -1,58 +0,0 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_SRC_HIP_FATBIN_H
#define HIP_SRC_HIP_FATBIN_H
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
// hip-clang fatbin format
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
#define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__"
#define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
struct __ClangOffloadBundleDesc {
uint64_t offset;
uint64_t size;
uint64_t tripleSize;
const char triple[1];
};
struct __ClangOffloadBundleHeader {
const char magic[sizeof(CLANG_OFFLOAD_BUNDLER_MAGIC) - 1];
uint64_t numBundles;
__ClangOffloadBundleDesc desc[1];
};
struct __CudaFatBinaryWrapper {
unsigned int magic;
unsigned int version;
__ClangOffloadBundleHeader* binary;
void* unused;
};
const void* __hipExtractCodeObjectFromFatBinary(const void* data,
const char* agent_name);
void __hipDumpCodeObject(const std::string& image);
#endif // HIP_SRC_HIP_FATBIN_H
Разница между файлами не показана из-за своего большого размера Загрузить разницу
Разница между файлами не показана из-за своего большого размера Загрузить разницу
-53
Просмотреть файл
@@ -1,53 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "hip/hip_runtime.h"
#include "hip_prof_api.h"
// HIP API callback/activity
api_callbacks_table_t callbacks_table;
extern std::string& FunctionSymbol(const hipFunction_t f);
const char* hipKernelNameRef(const hipFunction_t f) { return FunctionSymbol(f).c_str(); }
hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
return callbacks_table.set_callback(id, reinterpret_cast<api_callbacks_table_t::fun_t>(fun), arg) ?
hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRemoveApiCallback(uint32_t id) {
return callbacks_table.set_callback(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) {
return callbacks_table.set_activity(id, reinterpret_cast<api_callbacks_table_t::act_t>(fun), arg) ?
hipSuccess : hipErrorInvalidValue;
}
hipError_t hipRemoveActivityCallback(uint32_t id) {
return callbacks_table.set_activity(id, NULL, NULL) ? hipSuccess : hipErrorInvalidValue;
}
const char* hipApiName(uint32_t id) {
return hip_api_name(id);
}
Разница между файлами не показана из-за своего большого размера Загрузить разницу
Разница между файлами не показана из-за своего большого размера Загрузить разницу
-231
Просмотреть файл
@@ -1,231 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hc_am.hpp>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
// Peer access functions.
// There are two flavors:
// - one where contexts are specified with hipCtx_t type.
// - one where contexts are specified with integer deviceIds, that are mapped to the primary
// context for that device.
// The implementation contains a set of internal ihip* functions which operate on contexts. Then
// the public APIs are thin wrappers which call into this internal implementations.
// TODO - actually not yet - currently the integer deviceId flavors just call the context APIs. need
// to fix.
hipError_t ihipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
hipError_t err = hipSuccess;
if(canAccessPeer == NULL) {
err = hipErrorInvalidValue;
}
else if ((thisCtx != NULL) && (peerCtx != NULL)) {
if (thisCtx == peerCtx) {
*canAccessPeer = 0;
tprintf(DB_MEM, "Can't be peer to self. (this=%s, peer=%s)\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
} else if (HIP_FORCE_P2P_HOST & 0x2) {
*canAccessPeer = false;
tprintf(DB_MEM,
"HIP_FORCE_P2P_HOST denies peer access this=%s peer=%s canAccessPeer=%d\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str(), *canAccessPeer);
} else {
*canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
tprintf(DB_MEM, "deviceCanAccessPeer this=%s peer=%s canAccessPeer=%d\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str(), *canAccessPeer);
}
} else {
*canAccessPeer = 0;
err = hipErrorInvalidDevice;
}
return err;
}
/**
* HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P
*/
//---
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx) {
HIP_INIT_API(NONE, canAccessPeer, thisCtx, peerCtx);
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, thisCtx, peerCtx));
}
//---
// Disable visibility of this device into memory allocated on peer device.
// Remove this device from peer device peerlist.
hipError_t ihipDisablePeerAccess(TlsData* tls, hipCtx_t peerCtx) {
hipError_t err = hipSuccess;
auto thisCtx = ihipGetTlsDefaultCtx();
if ((thisCtx != NULL) && (peerCtx != NULL)) {
bool canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
if (!canAccessPeer) {
err = hipErrorInvalidDevice; // P2P not allowed between these devices.
} else if (thisCtx == peerCtx) {
err = hipErrorInvalidDevice; // Can't disable peer access to self.
} else {
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
bool changed = peerCrit->removePeerWatcher(peerCtx, thisCtx);
if (changed) {
tprintf(DB_MEM, "device %s disable access to memory allocated on peer:%s\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
// Update the peers for all memory already saved in the tracker:
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(),
peerCrit->peerAgents());
} else {
err = hipErrorPeerAccessNotEnabled; // never enabled P2P access.
}
}
} else {
err = hipErrorInvalidDevice;
}
return err;
};
//---
// Allow the current device to see all memory allocated on peerCtx.
// This should add this device to the peer-device peer list.
hipError_t ihipEnablePeerAccess(TlsData* tls, hipCtx_t peerCtx, unsigned int flags) {
hipError_t err = hipSuccess;
if (flags != 0) {
err = hipErrorInvalidValue;
} else {
auto thisCtx = ihipGetTlsDefaultCtx();
if (thisCtx == peerCtx) {
err = hipErrorInvalidDevice; // Can't enable peer access to self.
} else if ((thisCtx != NULL) && (peerCtx != NULL)) {
int canAccess = 0;
if ((hipSuccess != ihipDeviceCanAccessPeer(&canAccess,thisCtx,peerCtx)) || (canAccess == 0)){
tprintf(DB_MEM, "device=%s can't access peer=%s\n",thisCtx->toString().c_str(), peerCtx->toString().c_str());
err = hipErrorInvalidDevice;
} else {
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
// Add thisCtx to peerCtx's access list so that new allocations on peer will be made
// visible to this device:
bool isNewPeer = peerCrit->addPeerWatcher(peerCtx, thisCtx);
if (isNewPeer) {
tprintf(DB_MEM, "device=%s can now see all memory allocated on peer=%s\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(),
peerCrit->peerAgents());
} else {
err = hipErrorPeerAccessAlreadyEnabled;
}
}
} else {
err = hipErrorInvalidDevice;
}
}
return err;
}
//---
hipError_t hipMemcpyPeer(void* dst, hipCtx_t dstCtx, const void* src, hipCtx_t srcCtx,
size_t sizeBytes) {
HIP_INIT_API(NONE, dst, dstCtx, src, srcCtx, sizeBytes);
// TODO - move to ihip memory copy implementaion.
// HCC has a unified memory architecture so device specifiers are not required.
return ihipLogStatus(hipMemcpy(dst, src, sizeBytes, hipMemcpyDefault));
};
//---
hipError_t hipMemcpyPeerAsync(void* dst, hipCtx_t dstDevice, const void* src, hipCtx_t srcDevice,
size_t sizeBytes, hipStream_t stream) {
HIP_INIT_API(NONE, dst, dstDevice, src, srcDevice, sizeBytes, stream);
// TODO - move to ihip memory copy implementaion.
// HCC has a unified memory architecture so device specifiers are not required.
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
};
//=============================================================================
// These are the flavors that accept integer deviceIDs.
// Implementations map these to primary contexts and call the internal functions above.
//=============================================================================
hipError_t hipDeviceCanAccessPeer(int* canAccessPeer, int deviceId, int peerDeviceId) {
HIP_INIT_API(hipDeviceCanAccessPeer, canAccessPeer, deviceId, peerDeviceId);
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId),
ihipGetPrimaryCtx(peerDeviceId)));
}
hipError_t hipDeviceDisablePeerAccess(int peerDeviceId) {
HIP_INIT_API(hipDeviceDisablePeerAccess, peerDeviceId);
return ihipLogStatus(ihipDisablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId)));
}
hipError_t hipDeviceEnablePeerAccess(int peerDeviceId, unsigned int flags) {
HIP_INIT_API(hipDeviceEnablePeerAccess, peerDeviceId, flags);
return ihipLogStatus(ihipEnablePeerAccess(tls, ihipGetPrimaryCtx(peerDeviceId), flags));
}
hipError_t hipMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice,
size_t sizeBytes) {
HIP_INIT_API(hipMemcpyPeer, dst, dstDevice, src, srcDevice, sizeBytes);
return ihipLogStatus(hipMemcpyPeer(dst, ihipGetPrimaryCtx(dstDevice), src,
ihipGetPrimaryCtx(srcDevice), sizeBytes));
}
hipError_t hipMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice,
size_t sizeBytes, hipStream_t stream) {
HIP_INIT_API(hipMemcpyPeerAsync, dst, dstDevice, src, srcDevice, sizeBytes, stream);
return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDefault, stream));
}
hipError_t hipCtxEnablePeerAccess(hipCtx_t peerCtx, unsigned int flags) {
HIP_INIT_API(hipCtxEnablePeerAccess, peerCtx, flags);
return ihipLogStatus(ihipEnablePeerAccess(tls, peerCtx, flags));
}
hipError_t hipCtxDisablePeerAccess(hipCtx_t peerCtx) {
HIP_INIT_API(hipCtxDisablePeerAccess, peerCtx);
return ihipLogStatus(ihipDisablePeerAccess(tls, peerCtx));
}
-200
Просмотреть файл
@@ -1,200 +0,0 @@
// automatically generated sources
#ifndef _HIP_PROF_API_H
#define _HIP_PROF_API_H
#include <atomic>
#include <iostream>
#include <mutex>
#include "hip/hcc_detail/hip_prof_str.h"
template <typename Record, typename Fun, typename Act>
class api_callbacks_table_templ {
public:
typedef std::recursive_mutex mutex_t;
typedef Record record_t;
typedef Fun fun_t;
typedef Act act_t;
// HIP API callbacks table
struct hip_cb_table_entry_t {
volatile std::atomic<bool> sync;
volatile std::atomic<uint32_t> sem;
act_t act;
void* a_arg;
fun_t fun;
void* arg;
};
struct hip_cb_table_t {
hip_cb_table_entry_t arr[HIP_API_ID_NUMBER];
};
api_callbacks_table_templ() {
memset(&callbacks_table_, 0, sizeof(callbacks_table_));
}
bool set_activity(uint32_t id, act_t fun, void* arg) {
std::lock_guard<mutex_t> lock(mutex_);
bool ret = true;
if (id < HIP_API_ID_NUMBER) {
cb_sync(id);
callbacks_table_.arr[id].act = fun;
callbacks_table_.arr[id].a_arg = arg;
cb_release(id);
} else {
ret = false;
}
return ret;
}
bool set_callback(uint32_t id, fun_t fun, void* arg) {
std::lock_guard<mutex_t> lock(mutex_);
bool ret = true;
if (id < HIP_API_ID_NUMBER) {
cb_sync(id);
callbacks_table_.arr[id].fun = fun;
callbacks_table_.arr[id].arg = arg;
cb_release(id);
} else {
ret = false;
}
return ret;
}
inline hip_cb_table_entry_t& entry(const uint32_t& id) {
return callbacks_table_.arr[id];
}
inline void sem_sync(const uint32_t& id) {
sem_increment(id);
if (entry(id).sync.load() == true) sync_wait(id);
}
inline void sem_release(const uint32_t& id) {
sem_decrement(id);
}
private:
inline void cb_sync(const uint32_t& id) {
entry(id).sync.store(true);
while (entry(id).sem.load() != 0) {}
}
inline void cb_release(const uint32_t& id) {
entry(id).sync.store(false);
}
inline void sem_increment(const uint32_t& id) {
const uint32_t prev = entry(id).sem.fetch_add(1);
if (prev == UINT32_MAX) {
std::cerr << "sem overflow id = " << id << std::endl << std::flush;
abort();
}
}
inline void sem_decrement(const uint32_t& id) {
const uint32_t prev = entry(id).sem.fetch_sub(1);
if (prev == 0) {
std::cerr << "sem corrupted id = " << id << std::endl << std::flush;
abort();
}
}
void sync_wait(const uint32_t& id) {
sem_decrement(id);
while (entry(id).sync.load() == true) {}
sem_increment(id);
}
mutex_t mutex_;
hip_cb_table_t callbacks_table_;
};
#if USE_PROF_API
#include <prof_protocol.h>
static const uint32_t HIP_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_API;
typedef activity_record_t hip_api_record_t;
typedef activity_rtapi_callback_t hip_api_callback_t;
typedef activity_sync_callback_t hip_act_callback_t;
// HIP API callbacks spawner object macro
#define HIP_CB_SPAWNER_OBJECT(CB_ID) \
hip_api_data_t api_data{}; \
INIT_CB_ARGS_DATA(CB_ID, api_data); \
api_callbacks_spawner_t<HIP_API_ID_##CB_ID> __api_tracer(HIP_API_ID_##CB_ID, api_data);
typedef api_callbacks_table_templ<hip_api_record_t,
hip_api_callback_t,
hip_act_callback_t> api_callbacks_table_t;
extern api_callbacks_table_t callbacks_table;
template <int cid_>
class api_callbacks_spawner_t {
public:
api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) :
api_data_(api_data),
record_({})
{
if (cid_ >= HIP_API_ID_NUMBER) {
fprintf(stderr, "HIP %s bad id %d\n", __FUNCTION__, cid_);
abort();
}
callbacks_table.sem_sync(cid_);
act = entry(cid_).act;
a_arg = entry(cid_).a_arg;
fun = entry(cid_).fun;
arg = entry(cid_).arg;
api_data_.phase = 0;
if (act != NULL) act(cid_, &record_, &api_data_, a_arg);
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg);
}
~api_callbacks_spawner_t() {
api_data_.phase = 1;
if (act != NULL) act(cid_, &record_, &api_data_, a_arg);
if (fun != NULL) fun(HIP_DOMAIN_ID, cid_, &api_data_, arg);
callbacks_table.sem_release(cid_);
}
private:
inline api_callbacks_table_t::hip_cb_table_entry_t& entry(const uint32_t& id) {
return callbacks_table.entry(id);
}
hip_api_data_t& api_data_;
hip_api_record_t record_;
hip_act_callback_t act;
void* a_arg;
hip_api_callback_t fun;
void* arg;
};
template <>
class api_callbacks_spawner_t<HIP_API_ID_NUMBER> {
public:
api_callbacks_spawner_t(const hip_api_id_t& cid, hip_api_data_t& api_data) {}
};
#else
#define HIP_CB_SPAWNER_OBJECT(x) do {} while(0)
class api_callbacks_table_t {
public:
typedef void* act_t;
typedef void* fun_t;
bool set_activity(uint32_t id, act_t fun, void* arg) { return false; }
bool set_callback(uint32_t id, fun_t fun, void* arg) { return false; }
};
#endif
#endif // _HIP_PROF_API_H
-296
Просмотреть файл
@@ -1,296 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <thread>
#include <mutex>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
//-------------------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------------------
// Stream
//
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
enum queue_priority
{
priority_high = 0,
priority_normal = 0,
priority_low = 0
};
#else
enum queue_priority
{
priority_high = Kalmar::priority_high,
priority_normal = Kalmar::priority_normal,
priority_low = Kalmar::priority_low
};
#endif
//---
hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flags, int priority) {
ihipCtx_t* ctx = ihipGetTlsDefaultCtx();
hipError_t e = hipSuccess;
if (ctx) {
if (HIP_FORCE_NULL_STREAM) {
*stream = 0;
} else if( NULL == stream ){
e = hipErrorInvalidValue;
} else {
hc::accelerator acc = ctx->getWriteableDevice()->_acc;
// TODO - se try-catch loop to detect memory exception?
//
// Note this is an execute_any_order queue,
// CUDA stream behavior is that all kernels submitted will automatically
// wait for prev to complete, this behaviour will be mainatined by
// hipModuleLaunchKernel. execute_any_order will help
// hipExtModuleLaunchKernel , which uses a special flag
{
// Obtain mutex access to the device critical data, release by destructor
LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData());
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
auto istream = new ihipStream_t(ctx, acc.create_view(), flags);
#else
auto istream = new ihipStream_t(ctx, acc.create_view(Kalmar::execute_any_order, Kalmar::queuing_mode_automatic, (Kalmar::queue_priority)priority), flags);
#endif
ctxCrit->addStream(istream);
*stream = istream;
}
tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str());
}
} else {
e = hipErrorInvalidDevice;
}
return e;
}
//---
hipError_t hipStreamCreateWithFlags(hipStream_t* stream, unsigned int flags) {
HIP_INIT_API(hipStreamCreateWithFlags, stream, flags);
if(flags == hipStreamDefault || flags == hipStreamNonBlocking)
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority_normal));
else
return ihipLogStatus(hipErrorInvalidValue);
}
//---
hipError_t hipStreamCreate(hipStream_t* stream) {
HIP_INIT_API(hipStreamCreate, stream);
return ihipLogStatus(ihipStreamCreate(tls, stream, hipStreamDefault, priority_normal));
}
//---
hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) {
HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority);
// clamp priority to range [priority_high:priority_low]
priority = (priority < priority_high ? priority_high : (priority > priority_low ? priority_low : priority));
return ihipLogStatus(ihipStreamCreate(tls, stream, flags, priority));
}
//---
hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) {
HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority);
if (leastPriority != NULL) *leastPriority = priority_low;
if (greatestPriority != NULL) *greatestPriority = priority_high;
return ihipLogStatus(hipSuccess);
}
hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) {
HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags);
if (!event) return ihipLogStatus(hipErrorInvalidHandle);
auto ecd = event->locked_copyCrit();
if (event->_flags & hipEventInterprocess) {
// this is an IPC event
if (ecd._ipc_shmem->read_index >= 0) {
// we have at least one recorded event, so proceed
stream->locked_streamWaitEvent(ecd);
}
}
else {
if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) {
if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) {
ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked
: hc::hcWaitModeActive);
} else {
stream = ihipSyncAndResolveStream(stream);
// This will use create_blocking_marker to wait on the specified queue.
stream->locked_streamWaitEvent(ecd);
}
}
}
return ihipLogStatus(hipSuccess);
};
//---
hipError_t hipStreamQuery(hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipStreamQuery, TRACE_QUERY, stream);
// Use default stream if 0 specified:
if (stream == hipStreamNull) {
ihipCtx_t* device = ihipGetTlsDefaultCtx();
stream = device->_defaultStream;
}
bool isEmpty = 0;
{
LockedAccessor_StreamCrit_t crit(stream->_criticalData);
isEmpty = crit->_av.get_is_empty();
}
hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady;
return ihipLogStatus(e);
}
//---
hipError_t hipStreamSynchronize(hipStream_t stream) {
HIP_INIT_SPECIAL_API(hipStreamSynchronize, TRACE_SYNC, stream);
return ihipLogStatus(ihipStreamSynchronize(tls, stream));
}
//---
/**
* @return #hipSuccess, #hipErrorInvalidHandle
*/
hipError_t hipStreamDestroy(hipStream_t stream) {
HIP_INIT_API(hipStreamDestroy, stream);
hipError_t e = hipSuccess;
//--- Drain the stream:
if (stream == NULL) {
if (!HIP_FORCE_NULL_STREAM) {
e = hipErrorInvalidHandle;
}
} else {
stream->locked_wait();
ihipCtx_t* ctx = stream->getCtx();
if (ctx) {
ctx->locked_removeStream(stream);
delete stream;
} else {
e = hipErrorInvalidHandle;
}
}
return ihipLogStatus(e);
}
//---
hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) {
HIP_INIT_API(hipStreamGetFlags, stream, flags);
if (flags == NULL) {
return ihipLogStatus(hipErrorInvalidValue);
} else if (stream == hipStreamNull) {
return ihipLogStatus(hipErrorInvalidHandle);
} else {
*flags = stream->_flags;
return ihipLogStatus(hipSuccess);
}
}
//--
hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) {
HIP_INIT_API(hipStreamGetPriority, stream, priority);
if (priority == NULL) {
return ihipLogStatus(hipErrorInvalidValue);
} else if (stream == hipStreamNull) {
return ihipLogStatus(hipErrorInvalidHandle);
} else {
#if defined(__HCC__) && (__hcc_major__ < 3) && (__hcc_minor__ < 3)
*priority = 0;
#else
LockedAccessor_StreamCrit_t crit(stream->criticalData());
*priority = crit->_av.get_queue_priority();
#endif
return ihipLogStatus(hipSuccess);
}
}
//---
hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData,
unsigned int flags) {
HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags);
auto stream_original{stream};
stream = ihipSyncAndResolveStream(stream);
if (!stream) return hipErrorInvalidValue;
LockedAccessor_StreamCrit_t cs{stream->criticalData()};
// create first marker
auto cf = cs->_av.create_marker(hc::no_scope);
// get its signal
auto signal = *reinterpret_cast<hsa_signal_t*>(cf.get_native_handle());
// increment its signal value
hsa_signal_add_relaxed(signal, 1);
// create callback that can be passed to hsa_amd_signal_async_handler
// this function will call the user's callback, then sets first packet's signal to 0 to indicate completion
auto t{new std::function<void()>{[=]() {
callback(stream_original, hipSuccess, userData);
hsa_signal_store_relaxed(signal, 0);
}}};
// register above callback with HSA runtime to be called when first packet's signal
// is decremented from 2 to 1 by CP (or it is already at 1)
hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1,
[](hsa_signal_value_t x, void* p) {
(*static_cast<decltype(t)>(p))();
delete static_cast<decltype(t)>(p);
return false;
}, t);
// create additional marker that blocks on the first one
cs->_av.create_blocking_marker(cf, hc::no_scope);
return ihipLogStatus(hipSuccess);
}
-87
Просмотреть файл
@@ -1,87 +0,0 @@
/*
Copyright (c) 2018 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <map>
#include <string.h>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include "hip_surface.h"
static std::map<hipSurfaceObject_t, hipSurface*> surfaceHash;
void saveSurfaceInfo(const hipSurface* pSurface, const hipResourceDesc* pResDesc) {
if (pResDesc != nullptr) {
memcpy((void*)&(pSurface->resDesc), (void*)pResDesc, sizeof(hipResourceDesc));
}
}
// Surface Object APIs
hipError_t hipCreateSurfaceObject(hipSurfaceObject_t* pSurfObject,
const hipResourceDesc* pResDesc) {
HIP_INIT_API(hipCreateSurfaceObject, pSurfObject, pResDesc);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hipSurface* pSurface = (hipSurface*)malloc(sizeof(hipSurface));
if (pSurface != nullptr) {
memset(pSurface, 0, sizeof(hipSurface));
saveSurfaceInfo(pSurface, pResDesc);
}
switch (pResDesc->resType) {
case hipResourceTypeArray:
pSurface->array = pResDesc->res.array.array;
break;
default:
break;
}
unsigned int* surfObj;
hipMalloc((void**)&surfObj, sizeof(hipArray));
hipMemcpy(surfObj, (void*)pResDesc->res.array.array, sizeof(hipArray),
hipMemcpyHostToDevice);
*pSurfObject = (hipSurfaceObject_t)surfObj;
surfaceHash[*pSurfObject] = pSurface;
}
return ihipLogStatus(hip_status);
}
hipError_t hipDestroySurfaceObject(hipSurfaceObject_t surfaceObject) {
HIP_INIT_API(hipDestroySurfaceObject, surfaceObject);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hipSurface* pSurface = surfaceHash[surfaceObject];
if (pSurface != nullptr) {
free(pSurface);
surfaceHash.erase(surfaceObject);
}
}
return ihipLogStatus(hip_status);
}
-32
Просмотреть файл
@@ -1,32 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_SURFACE_H
#include <hip/hcc_detail/hip_surface_types.h>
struct hipSurface {
hipArray* array;
hipResourceDesc resDesc;
};
#endif
-851
Просмотреть файл
@@ -1,851 +0,0 @@
#include <map>
#include <string.h>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
#include "hip_texture.h"
static std::map<hipTextureObject_t, hipTexture*> textureHash;
void saveTextureInfo(const hipTexture* pTexture, const hipResourceDesc* pResDesc,
const hipTextureDesc* pTexDesc, const hipResourceViewDesc* pResViewDesc) {
if (pResDesc != nullptr) {
memcpy((void*)&(pTexture->resDesc), (void*)pResDesc, sizeof(hipResourceDesc));
}
if (pTexDesc != nullptr) {
memcpy((void*)&(pTexture->texDesc), (void*)pTexDesc, sizeof(hipTextureDesc));
}
if (pResViewDesc != nullptr) {
memcpy((void*)&(pTexture->resViewDesc), (void*)pResViewDesc, sizeof(hipResourceViewDesc));
}
}
void getDrvChannelOrderAndType(const enum hipArray_Format Format, enum hipTextureReadMode readMode, unsigned int NumChannels,
hsa_ext_image_channel_order_t* channelOrder,
hsa_ext_image_channel_type_t* channelType) {
switch (Format) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
break;
case HIP_AD_FORMAT_UNSIGNED_INT16:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
break;
case HIP_AD_FORMAT_UNSIGNED_INT32:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
break;
case HIP_AD_FORMAT_SIGNED_INT8:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
break;
case HIP_AD_FORMAT_SIGNED_INT16:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
break;
case HIP_AD_FORMAT_SIGNED_INT32:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
break;
case HIP_AD_FORMAT_HALF:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
break;
case HIP_AD_FORMAT_FLOAT:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
break;
default:
break;
}
if (NumChannels == 4) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
} else if (NumChannels == 2) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
} else if (NumChannels == 1) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
}
}
void getChannelOrderAndType(const hipChannelFormatDesc& desc, enum hipTextureReadMode readMode,
hsa_ext_image_channel_order_t* channelOrder,
hsa_ext_image_channel_type_t* channelType) {
if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
} else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGB;
} else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
} else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) {
*channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
} else {
}
switch (desc.f) {
case hipChannelFormatKindUnsigned:
switch (desc.x) {
case 32:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
break;
case 16:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
break;
case 8:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8
: HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
break;
default:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
}
break;
case hipChannelFormatKindSigned:
switch (desc.x) {
case 32:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
break;
case 16:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
break;
case 8:
*channelType = readMode == hipReadModeNormalizedFloat
? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8
: HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
break;
default:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
}
break;
case hipChannelFormatKindFloat:
switch (desc.x) {
case 32:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
break;
case 16:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
break;
case 8:
break;
default:
*channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
}
break;
case hipChannelFormatKindNone:
default:
break;
}
}
void fillSamplerDescriptor(hsa_ext_sampler_descriptor_t& samplerDescriptor,
enum hipTextureAddressMode addressMode,
enum hipTextureFilterMode filterMode, int normalizedCoords) {
if (normalizedCoords) {
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED;
} else {
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED;
}
switch (filterMode) {
case hipFilterModePoint:
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
break;
case hipFilterModeLinear:
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_LINEAR;
break;
}
switch (addressMode) {
case hipAddressModeWrap:
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT;
break;
case hipAddressModeClamp:
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
break;
case hipAddressModeMirror:
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT;
break;
case hipAddressModeBorder:
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
break;
}
}
bool getHipTextureObject(hipTextureObject_t* pTexObject, hsa_ext_image_t& image,
hsa_ext_sampler_t sampler) {
unsigned int* texSRD;
hipMalloc((void**)&texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
hipMemcpy(texSRD, (void*)image.handle, HIP_IMAGE_OBJECT_SIZE_DWORD * 4,
hipMemcpyDeviceToDevice);
hipMemcpy(texSRD + HIP_SAMPLER_OBJECT_OFFSET_DWORD, (void*)sampler.handle,
HIP_SAMPLER_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice);
*pTexObject = (hipTextureObject_t)texSRD;
#ifdef DEBUG
unsigned int* srd = (unsigned int*)malloc(HIP_TEXTURE_OBJECT_SIZE_DWORD * 4);
hipMemcpy(srd, texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToHost);
printf("New SRD: \n");
for (int i = 0; i < HIP_TEXTURE_OBJECT_SIZE_DWORD; i++) {
printf("SRD[%d]: %x\n", i, srd[i]);
}
printf("\n");
#endif
return true;
}
// Texture Object APIs
hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResourceDesc* pResDesc,
const hipTextureDesc* pTexDesc,
const hipResourceViewDesc* pResViewDesc) {
HIP_INIT_API(hipCreateTextureObject, pTexObject, pResDesc, pTexDesc, pResViewDesc);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
if (pTexture != nullptr) {
memset(pTexture, 0, sizeof(hipTexture));
saveTextureInfo(pTexture, pResDesc, pTexDesc, pResViewDesc);
}
hsa_ext_image_descriptor_t imageDescriptor;
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
void* devPtr = nullptr;
size_t pitch = 0;
switch (pResDesc->resType) {
case hipResourceTypeArray:
devPtr = pResDesc->res.array.array->data;
imageDescriptor.width = pResDesc->res.array.array->width;
imageDescriptor.height = pResDesc->res.array.array->height;
switch (pResDesc->res.array.array->type) {
case hipArrayLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
imageDescriptor.depth = 0;
imageDescriptor.array_size = pResDesc->res.array.array->depth;
break;
case hipArrayCubemap:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.depth = pResDesc->res.array.array->depth;
imageDescriptor.array_size = 0;
break;
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
case hipArrayDefault:
default:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
break;
}
getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode,
&channelOrder, &channelType);
break;
case hipResourceTypeMipmappedArray:
devPtr = pResDesc->res.mipmap.mipmap->data;
imageDescriptor.width = pResDesc->res.mipmap.mipmap->width;
imageDescriptor.height = pResDesc->res.mipmap.mipmap->height;
imageDescriptor.depth = pResDesc->res.mipmap.mipmap->depth;
imageDescriptor.array_size = 0;
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
getChannelOrderAndType(pResDesc->res.mipmap.mipmap->desc, pTexDesc->readMode,
&channelOrder, &channelType);
break;
case hipResourceTypeLinear:
devPtr = pResDesc->res.linear.devPtr;
imageDescriptor.width = pResDesc->res.linear.sizeInBytes/((pResDesc->res.linear.desc.x + pResDesc->res.linear.desc.y + pResDesc->res.linear.desc.z + pResDesc->res.linear.desc.w)/8);
imageDescriptor.height = 1;
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
imageDescriptor.geometry =
HSA_EXT_IMAGE_GEOMETRY_1D; // ? HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR
getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, &channelOrder,
&channelType);
break;
case hipResourceTypePitch2D:
devPtr = pResDesc->res.pitch2D.devPtr;
imageDescriptor.width = pResDesc->res.pitch2D.width;
imageDescriptor.height = pResDesc->res.pitch2D.height;
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
pitch = pResDesc->res.pitch2D.pitchInBytes;
getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode,
&channelOrder, &channelType);
break;
default:
break;
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_ext_sampler_descriptor_t samplerDescriptor;
fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode,
pTexDesc->normalizedCoords);
if(hipResourceTypeLinear == pResDesc->resType) {
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
} else if(!pTexDesc->normalizedCoords) {
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
}
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
if(hipResourceTypePitch2D != pResDesc->resType)
pitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT);
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
*agent, &imageDescriptor, devPtr, permission,
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) ||
HSA_STATUS_SUCCESS !=
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
free(pTexture);
return ihipLogStatus(hipErrorRuntimeOther);
}
getHipTextureObject(pTexObject, pTexture->image, pTexture->sampler);
textureHash[*pTexObject] = pTexture;
}
return ihipLogStatus(hip_status);
}
hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) {
HIP_INIT_API(hipDestroyTextureObject, textureObject);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = textureHash[textureObject];
if (pTexture != nullptr) {
hsa_ext_image_destroy(*agent, pTexture->image);
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
free(pTexture);
textureHash.erase(textureObject);
}
}
return ihipLogStatus(hip_status);
}
hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, textureObject);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hipTexture* pTexture = textureHash[textureObject];
if (pTexture != nullptr && pResDesc != nullptr) {
memcpy((void*)pResDesc, (void*)&(pTexture->resDesc), sizeof(hipResourceDesc));
}
}
return ihipLogStatus(hip_status);
}
hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, textureObject);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hipTexture* pTexture = textureHash[textureObject];
if (pTexture != nullptr && pResViewDesc != nullptr) {
memcpy((void*)pResViewDesc, (void*)&(pTexture->resViewDesc),
sizeof(hipResourceViewDesc));
}
}
return ihipLogStatus(hip_status);
}
hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc,
hipTextureObject_t textureObject) {
HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, textureObject);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hipTexture* pTexture = textureHash[textureObject];
if (pTexture != nullptr && pTexDesc != nullptr) {
memcpy((void*)pTexDesc, (void*)&(pTexture->texDesc), sizeof(hipTextureDesc));
}
}
return ihipLogStatus(hip_status);
}
// Texture Reference APIs
hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t size, textureReference* tex) {
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
hipError_t hip_status = hipSuccess;
enum hipTextureAddressMode addressMode = tex->addressMode[0];
enum hipTextureFilterMode filterMode = tex->filterMode;
int normalizedCoords = tex->normalized;
hipTextureObject_t& textureObject = tex->textureObject;
if(offset != nullptr)
*offset = 0;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
if (pTexture != nullptr) {
memset(pTexture, 0, sizeof(hipTexture));
}
hsa_ext_image_descriptor_t imageDescriptor;
assert(dim == hipTextureType1D);
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
imageDescriptor.width = size;
imageDescriptor.height = 1;
imageDescriptor.depth = 1;
imageDescriptor.array_size = 0;
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
if (NULL == desc) {
getDrvChannelOrderAndType(tex->format, readMode, tex->numChannels, &channelOrder, &channelType);
} else {
getChannelOrderAndType(*desc, readMode, &channelOrder, &channelType);
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_ext_sampler_descriptor_t samplerDescriptor;
samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST;
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER;
if (normalizedCoords) {
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED;
} else {
samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED;
}
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(size, IMAGE_PITCH_ALIGNMENT);
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
*agent, &imageDescriptor, devPtr, permission,
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) ||
HSA_STATUS_SUCCESS !=
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
free(pTexture);
return hipErrorRuntimeOther;
}
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
pTexture->devPtr = (void*) devPtr;
textureHash[textureObject] = pTexture;
}
return hip_status;
}
hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t size) {
HIP_INIT_API(hipBindTexture, offset, tex, devPtr, desc, size);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, hipReadModeElementType, offset, devPtr, desc,
size, tex);
return ihipLogStatus(hip_status);
}
hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode readMode, size_t* offset,
const void* devPtr, const struct hipChannelFormatDesc* desc,
size_t width, size_t height, textureReference* tex, size_t pitch) {
hipError_t hip_status = hipSuccess;
enum hipTextureAddressMode addressMode = tex->addressMode[0];
enum hipTextureFilterMode filterMode = tex->filterMode;
int normalizedCoords = tex->normalized;
hipTextureObject_t& textureObject = tex->textureObject;
if(offset != nullptr)
*offset = 0;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
if (pTexture != nullptr) {
memset(pTexture, 0, sizeof(hipTexture));
}
hsa_ext_image_descriptor_t imageDescriptor;
assert(dim == hipTextureType2D);
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
imageDescriptor.width = width;
imageDescriptor.height = height;
imageDescriptor.depth = 1;
imageDescriptor.array_size = 0;
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
if (NULL == desc) {
getDrvChannelOrderAndType(tex->format, readMode, tex->numChannels, &channelOrder, &channelType);
} else {
getChannelOrderAndType(*desc, readMode, &channelOrder, &channelType);
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_ext_sampler_descriptor_t samplerDescriptor;
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
if(!normalizedCoords) {
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
}
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
if( 0 == pitch)
pitch = getElementSize(channelOrder, channelType) * alignUp(width, IMAGE_PITCH_ALIGNMENT);
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
*agent, &imageDescriptor, devPtr, permission,
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) ||
HSA_STATUS_SUCCESS !=
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
free(pTexture);
return hipErrorRuntimeOther;
}
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
pTexture->devPtr = (void*) devPtr;
textureHash[textureObject] = pTexture;
}
return hip_status;
}
hipError_t hipBindTexture2D(size_t* offset, textureReference* tex, const void* devPtr,
const hipChannelFormatDesc* desc, size_t width, size_t height,
size_t pitch) {
HIP_INIT_API(hipBindTexture2D, offset, tex, devPtr, desc, width, height, pitch);
hipError_t hip_status = hipSuccess;
//TODO: Fix when HSA accepts user defined pitch
if(pitch % 64) pitch =0;
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, hipReadModeElementType, offset, devPtr,
desc, width, height, tex, pitch);
return ihipLogStatus(hip_status);
}
hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureReadMode readMode,
hipArray_const_t array,
const struct hipChannelFormatDesc& desc,
textureReference* tex) {
TlsData *tls = (tls_ == nullptr) ? tls_get_ptr() : tls_;
hipError_t hip_status = hipSuccess;
enum hipTextureAddressMode addressMode = tex->addressMode[0];
enum hipTextureFilterMode filterMode = tex->filterMode;
int normalizedCoords = tex->normalized;
hipTextureObject_t& textureObject = tex->textureObject;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = (hipTexture*)malloc(sizeof(hipTexture));
if (pTexture != nullptr) {
memset(pTexture, 0, sizeof(hipTexture));
}
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = array->width;
imageDescriptor.height = array->height;
imageDescriptor.depth = array->depth;
imageDescriptor.array_size = 0;
switch (dim) {
case hipTextureType1D:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
imageDescriptor.height = 1;
imageDescriptor.depth = 1;
break;
case hipTextureType2D:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
imageDescriptor.depth = 1;
break;
case hipTextureType3D:
case hipTextureTypeCubemap:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
break;
case hipTextureType1DLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
imageDescriptor.height = 1;
imageDescriptor.array_size = array->height;
break;
case hipTextureType2DLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
imageDescriptor.depth = 1;
imageDescriptor.array_size = array->depth;
break;
case hipTextureTypeCubemapLayered:
default:
break;
}
hsa_ext_image_channel_order_t channelOrder;
hsa_ext_image_channel_type_t channelType;
if (array->isDrv) {
getDrvChannelOrderAndType(array->Format, readMode, array->NumChannels,
&channelOrder, &channelType);
} else {
getChannelOrderAndType(desc, readMode, &channelOrder, &channelType);
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_ext_sampler_descriptor_t samplerDescriptor;
fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords);
if(!normalizedCoords) {
samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE;
}
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT);
if (HSA_STATUS_SUCCESS != hsa_ext_image_create_with_layout(
*agent, &imageDescriptor, array->data, permission,
HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) ||
HSA_STATUS_SUCCESS !=
hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) {
return hipErrorRuntimeOther;
}
getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler);
pTexture->devPtr = (void*) array;
textureHash[textureObject] = pTexture;
}
return hip_status;
}
hipError_t hipBindTextureToArray(textureReference* tex, hipArray_const_t array,
const hipChannelFormatDesc* desc) {
HIP_INIT_API(hipBindTextureToArray, tex, array, desc);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status =
ihipBindTextureToArrayImpl(tls, array->textureType, hipReadModeElementType, array, *desc, tex);
return ihipLogStatus(hip_status);
}
hipError_t hipBindTextureToMipmappedArray(textureReference* tex,
hipMipmappedArray_const_t mipmappedArray,
const hipChannelFormatDesc* desc) {
HIP_INIT_API(hipBindTextureToMipmappedArray, tex, mipmappedArray, desc);
hipError_t hip_status = hipSuccess;
return ihipLogStatus(hip_status);
}
hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) {
hipError_t hip_status = hipSuccess;
TlsData* tls=tls_get_ptr();
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
auto device = ctx->getWriteableDevice();
hsa_agent_t* agent = static_cast<hsa_agent_t*>(acc.get_hsa_agent());
hipTexture* pTexture = textureHash[textureObject];
if (pTexture != nullptr) {
hsa_ext_image_destroy(*agent, pTexture->image);
hsa_ext_sampler_destroy(*agent, pTexture->sampler);
free(pTexture);
textureHash.erase(textureObject);
}
}
return hip_status;
}
hipError_t hipUnbindTexture(const textureReference* tex) {
HIP_INIT_API(hipUnbindTexture, tex);
hipError_t hip_status = hipSuccess;
hip_status = ihipUnbindTextureImpl(tex->textureObject);
return ihipLogStatus(hip_status);
}
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) {
HIP_INIT_API(hipGetChannelDesc, desc, array);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
*desc = array->desc;
}
return ihipLogStatus(hip_status);
}
hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) {
HIP_INIT_API(hipGetTextureAlignmentOffset, offset, tex);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
if(offset != nullptr)
*offset = 0;
}
return ihipLogStatus(hip_status);
}
hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) {
HIP_INIT_API(hipGetTextureReference, tex, symbol);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
if (ctx) {
}
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefSetFormat(textureReference* tex, hipArray_Format fmt, int NumPackedComponents) {
HIP_INIT_API(hipTexRefSetFormat, tex, fmt, NumPackedComponents);
hipError_t hip_status = hipSuccess;
tex->format = fmt;
tex->numChannels = NumPackedComponents;
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefSetFlags(textureReference* tex, unsigned int flags) {
HIP_INIT_API(hipTexRefSetFlags, tex, flags);
hipError_t hip_status = hipSuccess;
if(flags == HIP_TRSF_READ_AS_INTEGER)
tex->readMode = hipReadModeElementType;
else if(flags == HIP_TRSF_NORMALIZED_COORDINATES)
tex->normalized = flags;
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefSetFilterMode(textureReference* tex, hipTextureFilterMode fm) {
HIP_INIT_API(hipTexRefSetFilterMode, tex, fm);
hipError_t hip_status = hipSuccess;
tex->filterMode = fm;
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefSetAddressMode(textureReference* tex, int dim, hipTextureAddressMode am) {
HIP_INIT_API(hipTexRefSetAddressMode, tex, dim, am);
hipError_t hip_status = hipSuccess;
tex->addressMode[dim] = am;
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* am, textureReference tex, int dim) {
HIP_INIT_API(hipTexRefGetAddressMode,am, &tex, dim);
if ((am == nullptr) || (dim >= 3))
return ihipLogStatus(hipErrorInvalidValue);
*am = tex.addressMode[dim];
return ihipLogStatus(hipSuccess);
}
hipError_t hipTexRefSetArray(textureReference* tex, hipArray_const_t array, unsigned int flags) {
HIP_INIT_API(hipTexRefSetArray, tex, array, flags);
hipError_t hip_status = hipSuccess;
hip_status = ihipBindTextureToArrayImpl(tls, array->textureType, tex->readMode, array,
array->desc, tex);
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefGetArray(hipArray_t* array, textureReference tex) {
HIP_INIT_API(hipTexRefGetArray, array, &tex);
if (array == nullptr)
return ihipLogStatus(hipErrorInvalidValue);
hipTexture* pTexture = textureHash[tex.textureObject];
if((pTexture == nullptr) || (hipResourceTypeArray != pTexture->resDesc.resType))
return ihipLogStatus(hipErrorInvalidImage);
if (pTexture->devPtr == nullptr)
return ihipLogStatus(hipErrorUnknown);
*array = reinterpret_cast<hipArray_t>(pTexture->devPtr);
return ihipLogStatus(hipSuccess);
}
hipError_t hipTexRefSetAddress(size_t* offset, textureReference* tex, hipDeviceptr_t devPtr,
size_t size) {
HIP_INIT_API(hipTexRefSetAddress, offset, tex, devPtr, size);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTextureImpl(tls, hipTextureType1D, tex->readMode, offset, devPtr, NULL,
size, tex);
return ihipLogStatus(hip_status);
}
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) {
HIP_INIT_API(hipTexRefGetAddress,dev_ptr, &tex);
if (dev_ptr == nullptr)
return ihipLogStatus(hipErrorInvalidValue);
hipTexture* pTexture = textureHash[tex.textureObject];
if (pTexture == nullptr)
return ihipLogStatus(hipErrorInvalidImage);
if (pTexture->devPtr == nullptr)
return ihipLogStatus(hipErrorUnknown);
*dev_ptr = reinterpret_cast<hipDeviceptr_t>(pTexture->devPtr);
return ihipLogStatus(hipSuccess);
}
hipError_t hipTexRefSetAddress2D(textureReference* tex, const HIP_ARRAY_DESCRIPTOR* desc,
hipDeviceptr_t devPtr, size_t pitch) {
HIP_INIT_API(hipTexRefSetAddress2D, tex, desc, devPtr, pitch);
size_t offset;
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
//TODO: Fix when HSA accepts user defined pitch
if(pitch % 64) pitch =0;
hip_status = ihipBindTexture2DImpl(tls, hipTextureType2D, tex->readMode, &offset, devPtr,
NULL, desc->Width, desc->Height, tex, pitch);
return ihipLogStatus(hip_status);
}
-37
Просмотреть файл
@@ -1,37 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
#define HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H
#include <hip/hcc_detail/texture_types.h>
struct hipTexture {
hipResourceDesc resDesc;
hipTextureDesc texDesc;
hipResourceViewDesc resViewDesc;
hsa_ext_image_t image;
hsa_ext_sampler_t sampler;
void* devPtr;
};
#endif
-38
Просмотреть файл
@@ -1,38 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
#define HIP_INCLUDE_HCC_DETAIL_HIP_UTIL_H
#include <assert.h>
#include <stdint.h>
#include <iostream>
#include <sstream>
#include <list>
#include <sys/types.h>
#include <unistd.h>
#include <deque>
#include <vector>
#include <algorithm>
#endif
-634
Просмотреть файл
@@ -1,634 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "../include/hip/hiprtc.h"
#include "code_object_bundle.inl"
#include "../include/hip/hcc_detail/elfio/elfio.hpp"
#include "../include/hip/hcc_detail/program_state.hpp"
#include "../lpl_ca/pstreams/pstream.h"
#include <hsa/hsa.h>
#include <cxxabi.h>
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <future>
#include <iterator>
#include <memory>
#include <mutex>
#include <stdexcept>
#include <string>
#include <sstream>
#include <unordered_map>
#include <utility>
#include <vector>
#include <iostream>
#include <sys/stat.h>
extern "C" const char* hiprtcGetErrorString(hiprtcResult x)
{
switch (x) {
case HIPRTC_SUCCESS:
return "HIPRTC_SUCCESS";
case HIPRTC_ERROR_OUT_OF_MEMORY:
return "HIPRTC_ERROR_OUT_OF_MEMORY";
case HIPRTC_ERROR_PROGRAM_CREATION_FAILURE:
return "HIPRTC_ERROR_PROGRAM_CREATION_FAILURE";
case HIPRTC_ERROR_INVALID_INPUT:
return "HIPRTC_ERROR_INVALID_INPUT";
case HIPRTC_ERROR_INVALID_PROGRAM:
return "HIPRTC_ERROR_INVALID_PROGRAM";
case HIPRTC_ERROR_INVALID_OPTION:
return "HIPRTC_ERROR_INVALID_OPTION";
case HIPRTC_ERROR_COMPILATION:
return "HIPRTC_ERROR_COMPILATION";
case HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE:
return "HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE";
case HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION:
return "HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION";
case HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION:
return "HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION";
case HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID:
return "HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID";
case HIPRTC_ERROR_INTERNAL_ERROR:
return "HIPRTC_ERROR_INTERNAL_ERROR";
default: throw std::logic_error{"Invalid HIPRTC result."};
};
}
namespace hip_impl {
inline bool create_directory(const std::string& path) {
mode_t mode = 0755;
int ret = mkdir(path.c_str(), mode);
if (ret == 0) return true;
return false;
}
inline bool fileExists (const std::string& name) {
struct stat buffer;
return (stat (name.c_str(), &buffer) == 0);
}
} // namespace hip_impl
namespace
{
char* demangle(const char* x)
{
if (!x) return nullptr;
int s{};
char* tmp = abi::__cxa_demangle(x, nullptr, nullptr, &s);
if (s != 0) return nullptr;
return tmp;
}
} // Unnamed namespace.
namespace
{
struct Symbol {
std::string name;
ELFIO::Elf64_Addr value = 0;
ELFIO::Elf_Xword size = 0;
ELFIO::Elf_Half sect_idx = 0;
std::uint8_t bind = 0;
std::uint8_t type = 0;
std::uint8_t other = 0;
};
inline
Symbol read_symbol(const ELFIO::symbol_section_accessor& section,
unsigned int idx) {
assert(idx < section.get_symbols_num());
Symbol r;
section.get_symbol(
idx, r.name, r.value, r.size, r.bind, r.type, r.sect_idx, r.other);
return r;
}
} // Unnamed namespace.
struct _hiprtcProgram {
// DATA - STATICS
static std::vector<std::unique_ptr<_hiprtcProgram>> programs;
static std::mutex mtx;
// DATA
std::vector<std::pair<std::string, std::string>> headers;
std::vector<std::pair<std::string, std::string>> names;
std::vector<std::string> loweredNames;
std::vector<char> elf;
std::string source;
std::string name;
std::string log;
bool compiled;
// STATICS
static
hiprtcResult destroy(_hiprtcProgram* p)
{
using namespace std;
lock_guard<mutex> lck{mtx};
const auto it{find_if(programs.cbegin(), programs.cend(),
[=](const unique_ptr<_hiprtcProgram>& x) {
return x.get() == p;
})};
if (it == programs.cend()) return HIPRTC_ERROR_INVALID_PROGRAM;
return HIPRTC_SUCCESS;
}
static
std::string handleMangledName(std::string name)
{
using namespace std;
char* demangled = demangle(name.c_str());
name.assign(demangled == nullptr ? "" : demangled);
free(demangled);
if (name.empty()) return name;
if (name.find("void ") == 0) name.erase(0, strlen("void "));
auto dx{name.find_first_of("(<")};
if (dx == string::npos) return name;
if (name[dx] == '<') {
auto cnt{1u};
do {
++dx;
cnt += (name[dx] == '<') ? 1 : ((name[dx] == '>') ? -1 : 0);
} while (cnt);
name.erase(++dx);
}
else name.erase(dx);
return name;
}
static
_hiprtcProgram* make(std::string s, std::string n,
std::vector<std::pair<std::string, std::string>> h)
{
using namespace std;
unique_ptr<_hiprtcProgram> tmp{new _hiprtcProgram{move(h), {}, {}, {},
move(s), move(n), {},
false}};
lock_guard<mutex> lck{mtx};
programs.push_back(move(tmp));
return programs.back().get();
}
static
bool isValid(_hiprtcProgram* p) noexcept
{
return std::find_if(programs.cbegin(), programs.cend(),
[=](const std::unique_ptr<_hiprtcProgram>& x) {
return x.get() == p;
}) != programs.cend();
}
// MANIPULATORS
bool compile(const std::vector<std::string>& args)
{
using namespace ELFIO;
using namespace redi;
using namespace std;
ipstream compile{args.front(), args, pstreambuf::pstderr};
constexpr const auto tmp_size{1024u};
char tmp[tmp_size]{};
while (!compile.eof()) {
log.append(tmp, tmp + compile.readsome(tmp, tmp_size));
}
compile.close();
if (compile.rdbuf()->exited() &&
compile.rdbuf()->status() != EXIT_SUCCESS) return false;
elfio reader;
if (!reader.load(args.back())) return false;
const auto it{find_if(reader.sections.begin(), reader.sections.end(),
[](const section* x) {
return (x->get_name() == ".hip_fatbin") || (x->get_name() == ".kernel");
})};
if (it == reader.sections.end()) return false;
hip_impl::Bundled_code_header h{(*it)->get_data()};
if (bundles(h).empty()) return false;
elf.assign(bundles(h).back().blob.cbegin(),
bundles(h).back().blob.cend());
return true;
}
bool readLoweredNames()
{
using namespace ELFIO;
using namespace hip_impl;
using namespace std;
if (names.empty()) return true;
istringstream blob{string{elf.cbegin(), elf.cend()}};
elfio reader;
if (!reader.load(blob)) return false;
const auto it{find_if(reader.sections.begin(), reader.sections.end(),
[](const section* x) {
return x->get_type() == SHT_SYMTAB;
})};
ELFIO::symbol_section_accessor symbols{reader, *it};
auto n{symbols.get_symbols_num()};
if (n < loweredNames.size()) return false;
while (n--) {
const auto tmp{read_symbol(symbols, n)};
auto it{find_if(names.cbegin(), names.cend(),
[&](const pair<string, string>& x) {
return x.second == tmp.name;
})};
if (it == names.cend()) {
const auto name{handleMangledName(tmp.name)};
if (name.empty()) continue;
it = find_if(names.cbegin(), names.cend(),
[&](const pair<string, string>& x) {
return x.second == name;
});
if (it == names.cend()) continue;
}
loweredNames[distance(names.cbegin(), it)] = tmp.name;
}
return true;
}
void replaceExtension(std::string& fileName, const std::string &ext) const {
auto res = fileName.rfind('.');
auto sloc = fileName.rfind('/'); // slash location
if (res != std::string::npos && (res > sloc || sloc == std::string::npos)) {
fileName.replace(fileName.begin() + res, fileName.end(), ext);
} else {
fileName += ext;
}
}
// ACCESSORS
std::string writeTemporaryFiles(
const std::string& programFolder) const
{
using namespace std;
vector<future<void>> fut{headers.size()};
transform(headers.cbegin(), headers.cend(), begin(fut),
[&](const pair<string, string>& x) {
return async([&]() {
ofstream h{programFolder + '/' + x.first};
h.write(x.second.data(), x.second.size());
});
});
auto tmp{(programFolder + '/' + name)};
replaceExtension(tmp, ".cpp");
ofstream{tmp}.write(source.data(), source.size());
return tmp;
}
};
std::vector<std::unique_ptr<_hiprtcProgram>> _hiprtcProgram::programs{};
std::mutex _hiprtcProgram::mtx{};
namespace
{
inline
bool isValidProgram(const hiprtcProgram p)
{
if (!p) return false;
std::lock_guard<std::mutex> lck{_hiprtcProgram::mtx};
return _hiprtcProgram::isValid(p);
}
} // Unnamed namespace.
extern "C" hiprtcResult hiprtcAddNameExpression(hiprtcProgram p, const char* n)
{
if (!n) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (p->compiled) return HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION;
const auto id{p->names.size()};
p->names.emplace_back(n, n);
p->loweredNames.emplace_back();
if (p->names.back().second.back() == ')') {
p->names.back().second.pop_back();
p->names.back().second.erase(0, p->names.back().second.find('('));
}
if (p->names.back().second.front() == '&') {
p->names.back().second.erase(0, 1);
}
const auto var{"__hiprtc_" + std::to_string(id)};
p->source.append("\nextern \"C\" constexpr auto " + var + " = " + n + ';');
return HIPRTC_SUCCESS;
}
namespace
{
class Unique_temporary_path {
// DATA
std::string path_{};
public:
// CREATORS
Unique_temporary_path() : path_{std::tmpnam(nullptr)}
{
while (hip_impl::fileExists(path_)) {
path_ = std::tmpnam(nullptr);
}
}
Unique_temporary_path(const Unique_temporary_path&) = default;
Unique_temporary_path(Unique_temporary_path&&) = default;
~Unique_temporary_path() noexcept
{
std::string s("rm -r " + path_);
system(s.c_str());
}
// MANIPULATORS
Unique_temporary_path& operator=(
const Unique_temporary_path&) = default;
Unique_temporary_path& operator=(Unique_temporary_path&&) = default;
// ACCESSORS
const std::string& path() const noexcept
{
return path_;
}
};
} // Unnamed namespace.
namespace
{
const std::string& defaultTarget()
{
using namespace std;
static string r{"gfx900"};
static once_flag f{};
call_once(f, []() {
static hsa_agent_t a{};
hsa_iterate_agents([](hsa_agent_t x, void*) {
hsa_device_type_t t{};
hsa_agent_get_info(x, HSA_AGENT_INFO_DEVICE, &t);
if (t != HSA_DEVICE_TYPE_GPU) return HSA_STATUS_SUCCESS;
a = x;
return HSA_STATUS_INFO_BREAK;
}, nullptr);
if (!a.handle) return;
hsa_agent_iterate_isas(a, [](hsa_isa_t x, void*){
uint32_t n{};
hsa_isa_get_info_alt(x, HSA_ISA_INFO_NAME_LENGTH, &n);
if (n == 0) return HSA_STATUS_SUCCESS;
r.resize(n);
hsa_isa_get_info_alt(x, HSA_ISA_INFO_NAME, &r[0]);
r.erase(0, r.find("gfx"));
return HSA_STATUS_INFO_BREAK;
}, nullptr);
});
return r;
}
inline
void handleTarget(std::vector<std::string>& args)
{
using namespace std;
bool hasTarget{false};
for (auto&& x : args) {
const auto dx{x.find("--gpu-architecture")};
const auto dy{(dx == string::npos) ? x.find("-arch")
: string::npos};
if (dx == dy) continue;
x.replace(0, x.find('=', min(dx, dy)), "--amdgpu-target");
hasTarget = true;
break;
}
if (!hasTarget) args.push_back("--amdgpu-target=" + defaultTarget());
}
} // Unnamed namespace.
extern "C" hiprtcResult hiprtcCompileProgram(hiprtcProgram p, int n, const char** o)
{
using namespace std;
if (n && !o) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (p->compiled) return HIPRTC_ERROR_COMPILATION;
static const string hipcc{
getenv("HIP_PATH") ? (getenv("HIP_PATH") + string{"/bin/hipcc"})
: "/opt/rocm/bin/hipcc"};
if (!hip_impl::fileExists(hipcc)) {
return HIPRTC_ERROR_INTERNAL_ERROR;
}
Unique_temporary_path tmp{};
hip_impl::create_directory(tmp.path());
const auto src{p->writeTemporaryFiles(tmp.path())};
vector<string> args{hipcc, "-fPIC -shared"};
if (n) args.insert(args.cend(), o, o + n);
handleTarget(args);
args.emplace_back(src);
args.emplace_back("-o");
args.emplace_back(tmp.path() + '/' + "hiprtc.out");
if (!p->compile(args)) return HIPRTC_ERROR_INTERNAL_ERROR;
if (!p->readLoweredNames()) return HIPRTC_ERROR_INTERNAL_ERROR;
p->compiled = true;
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcCreateProgram(hiprtcProgram* p, const char* src,
const char* name, int n, const char** hdrs,
const char** incs)
{
using namespace std;
if (!p) return HIPRTC_ERROR_INVALID_PROGRAM;
if (n < 0) return HIPRTC_ERROR_INVALID_INPUT;
if (n && (!hdrs || !incs)) return HIPRTC_ERROR_INVALID_INPUT;
vector<pair<string, string>> h;
for (auto i = 0; i != n; ++i) h.emplace_back(incs[i], hdrs[i]);
*p = _hiprtcProgram::make(src, name ? name : "default_name", move(h));
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcDestroyProgram(hiprtcProgram* p)
{
if (!p) return HIPRTC_SUCCESS;
return _hiprtcProgram::destroy(*p);
}
extern "C" hiprtcResult hiprtcGetLoweredName(hiprtcProgram p, const char* n,
const char** ln)
{
using namespace std;
if (!n || !ln) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (!p->compiled) return HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION;
const auto it{find_if(p->names.cbegin(), p->names.cend(),
[=](const pair<string, string>& x) {
return x.first == n;
})};
if (it == p->names.cend()) return HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID;
*ln = p->loweredNames[distance(p->names.cbegin(), it)].c_str();
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcGetProgramLog(hiprtcProgram p, char* l)
{
if (!l) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
l = std::copy_n(p->log.data(), p->log.size(), l);
*l = '\0';
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram p, std::size_t* sz)
{
if (!sz) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
*sz = p->log.empty() ? 0 : p->log.size() + 1;
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcGetCode(hiprtcProgram p, char* c)
{
if (!c) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
std::copy_n(p->elf.data(), p->elf.size(), c);
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcGetCodeSize(hiprtcProgram p, std::size_t* sz)
{
if (!sz) return HIPRTC_ERROR_INVALID_INPUT;
if (!isValidProgram(p)) return HIPRTC_ERROR_INVALID_PROGRAM;
if (!p->compiled) return HIPRTC_ERROR_INVALID_PROGRAM;
*sz = p->elf.size();
return HIPRTC_SUCCESS;
}
extern "C" hiprtcResult hiprtcVersion(int* major, int* minor)
{
if (major == nullptr || minor == nullptr) {
return HIPRTC_ERROR_INVALID_INPUT;
}
*major = 9;
*minor = 0;
return HIPRTC_SUCCESS;
}
-97
Просмотреть файл
@@ -1,97 +0,0 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Internal header, do not percolate upwards.
#include "hip_hcc_internal.h"
#include "hc.hpp"
#include "trace_helper.h"
#include <iostream>
#include <sstream>
namespace hip_impl
{
hc::accelerator_view lock_stream_hip_(
hipStream_t& stream, void*& locked_stream)
{ // This allocated but does not take ownership of locked_stream. If it is
// not deleted elsewhere it will leak.
using L = decltype(stream->lockopen_preKernelCommand());
HIP_INIT();
stream = ihipSyncAndResolveStream(stream);
locked_stream = new L{stream->lockopen_preKernelCommand()};
return (*static_cast<L*>(locked_stream))->_av;
}
void print_prelaunch_trace_(
const char* kernel_name,
dim3 num_blocks,
dim3 dim_blocks,
int group_mem_bytes,
hipStream_t stream)
{
if ((HIP_TRACE_API & (1 << TRACE_KCMD)) ||
HIP_PROFILE_API ||
(COMPILE_HIP_DB && (HIP_TRACE_API & (1<<TRACE_ALL)))) {
std::stringstream os;
os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum()
<< " hipLaunchKernel '" << kernel_name << "'"
<< " gridDim:" << num_blocks
<< " groupDim:" << dim_blocks
<< " sharedMem:+" << group_mem_bytes
<< " " << *stream;
if (HIP_PROFILE_API == 0x1) {
std::string shortAtpString("hipLaunchKernel:");
shortAtpString += kernel_name;
MARKER_BEGIN(shortAtpString.c_str(), "HIP");
} else if (HIP_PROFILE_API == 0x2) {
MARKER_BEGIN(os.str().c_str(), "HIP");
}
if (COMPILE_HIP_DB && HIP_TRACE_API) {
std::string fullStr;
GET_TLS();
recordApiTrace(tls, &fullStr, os.str());
}
}
}
void unlock_stream_hip_(
hipStream_t stream,
void* locked_stream,
const char* kernel_name,
hc::accelerator_view* acc_v)
{ // Precondition: acc_v is the accelerator_view associated with stream
// which is guarded by locked_stream;
// locked_stream is deletable.
using L = decltype(stream->lockopen_preKernelCommand());
stream->lockclose_postKernelCommand(kernel_name, acc_v);
delete static_cast<L*>(locked_stream);
if(HIP_PROFILE_API) {
MARKER_END();
}
}
}
-101
Просмотреть файл
@@ -1,101 +0,0 @@
#include "../include/hip/hcc_detail/program_state.hpp"
// contains implementation of program_state_impl
#include "program_state.inl"
#include <hsa/hsa.h>
#include <cstdint>
#include <stdexcept>
#include <unordered_map>
#include <vector>
namespace hip_impl {
kernarg::kernarg() : impl(new kernarg_impl) {
}
kernarg::kernarg(kernarg&& k) : impl(k.impl) {
k.impl = nullptr;
}
kernarg::~kernarg() {
if (impl)
delete(impl);
}
std::uint8_t* kernarg::data() {
return impl->v.data();
}
std::size_t kernarg::size() {
return impl->v.size();
}
void kernarg::reserve(std::size_t c) {
impl->v.reserve(c);
}
void kernarg::resize(std::size_t c) {
impl->v.resize(c);
}
std::size_t kernargs_size_align::kernargs_size_align::size(std::size_t n) const{
return (*reinterpret_cast<const std::vector<std::pair<std::size_t, std::size_t>>*>(handle))[n].first;
}
std::size_t kernargs_size_align::alignment(std::size_t n) const{
return (*reinterpret_cast<const std::vector<std::pair<std::size_t, std::size_t>>*>(handle))[n].second;
}
program_state::program_state() : impl(new program_state_impl) {
if (!impl) hip_throw(std::runtime_error {
"Unknown error when constructing program state."});
}
program_state::~program_state() {
delete(impl);
}
void* program_state::global_addr_by_name(const char* name) {
const auto it = impl->get_globals().find(name);
if (it == impl->get_globals().end())
return nullptr;
else
return it->second.first;
}
hsa_executable_t program_state::load_executable(const char* data,
const size_t data_size,
hsa_executable_t executable,
hsa_agent_t agent) {
return impl->load_executable(data, data_size, true, executable, agent);
}
hsa_executable_t program_state::load_executable_no_copy(const char* data,
const size_t data_size,
hsa_executable_t executable,
hsa_agent_t agent) {
return impl->load_executable(data, data_size, false, executable, agent);
}
hipFunction_t program_state::kernel_descriptor(std::uintptr_t function_address,
hsa_agent_t agent) {
auto& kd = impl->kernel_descriptor(function_address, agent);
return kd;
}
kernargs_size_align program_state::get_kernargs_size_align(std::uintptr_t kernel) {
kernargs_size_align t;
t.handle = reinterpret_cast<const void*>(&impl->kernargs_size_align(kernel));
return t;
}
std::mutex executables_cache_mutex;
std::vector<hsa_executable_t>& executables_cache(
std::string elf, hsa_isa_t isa, hsa_agent_t agent) {
static std::unordered_map<std::string,
std::unordered_map<hsa_isa_t,
std::unordered_map<hsa_agent_t, std::vector<hsa_executable_t>>>> cache;
return cache[elf][isa][agent];
}
};
Разница между файлами не показана из-за своего большого размера Загрузить разницу
-125
Просмотреть файл
@@ -1,125 +0,0 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
//#pragma once
#ifndef TRACE_HELPER_H
#define TRACE_HELPER_H
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
//---
// Helper functions to convert HIP function arguments into strings.
// Handles POD data types as well as enumerations (ie hipMemcpyKind).
// The implementation uses C++11 variadic templates and template specialization.
// The hipMemcpyKind example below is a good example that shows how to implement conversion for a
// new HSA type.
// Handy macro to convert an enumeration to a stringified version of same:
#define CASE_STR(x) \
case x: \
return #x;
// Building block functions:
template <typename T>
inline std::string ToHexString(T v) {
std::ostringstream ss;
ss << "0x" << std::hex << v;
return ss.str();
};
//---
// Template overloads for ToString to handle specific types
// This is the default which works for most types:
template <typename T>
inline std::string ToString(T v) {
std::ostringstream ss;
ss << v;
return ss.str();
};
// hipEvent_t specialization. TODO - maybe add an event ID for debug?
template <>
inline std::string ToString(hipEvent_t v) {
std::ostringstream ss;
ss << v;
return ss.str();
};
// hipIpcEventHandle_t specialization. TODO
template <>
inline std::string ToString(hipIpcEventHandle_t v) {
return std::string{};
};
// hipStream_t
template <>
inline std::string ToString(hipStream_t v) {
std::ostringstream ss;
if (v == NULL) {
ss << "stream:<null>";
} else {
ss << *v;
}
return ss.str();
};
// hipMemcpyKind specialization
template <>
inline std::string ToString(hipMemcpyKind v) {
switch (v) {
CASE_STR(hipMemcpyHostToHost);
CASE_STR(hipMemcpyHostToDevice);
CASE_STR(hipMemcpyDeviceToHost);
CASE_STR(hipMemcpyDeviceToDevice);
CASE_STR(hipMemcpyDefault);
default:
return ToHexString(v);
};
};
template <>
inline std::string ToString(hipError_t v) {
return ihipErrorString(v);
};
// Catch empty arguments case
inline std::string ToString() { return (""); }
//---
// C++11 variadic template - peels off first argument, converts to string, and calls itself again to
// peel the next arg. Strings are automatically separated by comma+space.
template <typename T, typename... Args>
inline std::string ToString(T first, Args... args) {
return ToString(first) + ", " + ToString(args...);
}
#endif