diff --git a/projects/hip/CMakeLists.txt b/projects/hip/CMakeLists.txt index 94ed2a7562..eee1a14a8a 100644 --- a/projects/hip/CMakeLists.txt +++ b/projects/hip/CMakeLists.txt @@ -142,7 +142,7 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) # Build steps ############################# # Rebuild cmake cache updates .hipInfo and .hipVersion -add_custom_target(update_build_and_version_info ALL COMMAND make rebuild_cache) +add_custom_target(update_build_and_version_info COMMAND make rebuild_cache) # Build clang hipify if enabled add_subdirectory(hipify-clang) @@ -372,14 +372,14 @@ endif() # Testing steps ############################# # Target: test -set(HIP_PATH ${CMAKE_INSTALL_PREFIX}) +set(HIP_ROOT_DIR ${CMAKE_INSTALL_PREFIX}) set(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}) -execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/cmake" "${HIP_PATH}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET) +execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/cmake" "${HIP_ROOT_DIR}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET) if(${RUN_HIT} EQUAL 0) - execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/bin" "${HIP_PATH}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET) + execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/bin" "${HIP_ROOT_DIR}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET) endif() if(${RUN_HIT} EQUAL 0) - set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) + set(CMAKE_MODULE_PATH "${HIP_ROOT_DIR}/cmake" ${CMAKE_MODULE_PATH}) include(${HIP_SRC_PATH}/tests/hit/HIT.cmake) # Add tests diff --git a/projects/hip/docs/markdown/hip_bugs.md b/projects/hip/docs/markdown/hip_bugs.md index 73133843bc..9452fae2fd 100644 --- a/projects/hip/docs/markdown/hip_bugs.md +++ b/projects/hip/docs/markdown/hip_bugs.md @@ -1,5 +1,4 @@ -# HIP Bugs - +# HIP Bugs - [Errors related to undefined reference to `__hcLaunchKernel__***__grid_launch_parm**`](#errors-related-to-undefined-reference-to-__hclaunchkernel____grid_launch_parm) @@ -41,60 +40,86 @@ For example, `Foo` in the code snippets below contains an array-typed member var ``` struct Foo { + float _data; // table is an array, which makes foo int table[3]; }; ``` -An workaround is to provide a custom serializer on CPU side, and append the contents of the array as kernel arguments: +A workaround is to provide a custom serializer on host side which appends the contents of the array as kernel arguments, and a custome deserializaer on the device path to reconstruct the array inside the GPU kernels. +The deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example: ``` struct Foo { - int table[3]; + float _data; + int _table[3]; + - // user-provided CPU serializer - // must append the contents of the array member as kernel arguments #ifdef __HCC__ + // user-provided CPU serializer + // Append the contents of the array member as kernel arguments __attribute__((annotate(“serialize”))) void __cxxamp_serialize(Kalmar::Serialize &s) const { + s.Append(sizeof(float), &_data); for (int i = 0; i < 3; ++i) - s.Append(sizeof(int), &table[i]); + s.Append(sizeof(int), &_table[i]); } -#endif -}; -``` -Then, provide a custom deserializer on GPU side, to help reconstruct the array within GPU kernels. Notice that the deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example: - -``` -struct Foo { - int table[3]; // user-provided GPU deserializer // table has 3 int elements, so deserializer must have 3 int parameters. -#ifdef __HCC__ __attribute__((annotate(“user_deserialize”))) - Foo(int x0, int x1, int x2) [[cpu]][[hc]] { - table[0] = x0; - table[1] = x1; - table[2] = x2; + Foo(float d, int x0, int x1, int x2) [[cpu]][[hc]] { + _data = d; + _table[0] = x0; + _table[1] = x1; + _table[2] = x2; } -#endif -#ifdef __HCC__ - __attribute__((annotate(“serialize”))) - void __cxxamp_serialize(Kalmar::Serialize &s) const { - s.Append(sizeof(int), &table[0]); - s.Append(sizeof(int), &table[1]); - s.Append(sizeof(int), &table[2]); - } #endif }; ``` Rather than create serializer functions, another workaround is to pass the member fields from the structure as simple data types. +Note a class or struct can contain only one "user_deserialize" constructor. +For types which contain arrays which are based on template parameter, you can use partial template instantiation to implement one constructor per specialization. +However, an easier approach may be to create one user_deserializer which processes the maximum supported dimension. +This will take more memory in the structure and also require additional kernel arguments, but this may have little performance impact and the conversion is easier than partial template specialization. An example: + +``` +#define MAX_Dim 4 +template struct MyArray { + + T* dataPtr_; + //int size_[Dim]; // Original code with template-sized Dims + int size_[MAX_dim]; // Workaround code - allocate an array big enough for all dims so one serializer works. + + +... + +#ifdef __HCC__ + __attribute__((annotate("serialize"))) + void __cxxamp_serialize(Kalmar::Serialize &s) const { + s.Append(sizeof(float), &_dataPtr); + for (int i=0; i using is_new_grid_launch_t = typename std::conditional< - std::is_callable{}, + is_callable{}, New_grid_launch_tag, Old_grid_launch_tag>::type; } @@ -118,6 +119,7 @@ namespace hip_impl // TODO: these are workarounds, they should be removed. hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&); + void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t); void unlock_stream_hip_( hipStream_t, void*, const char*, hc::accelerator_view*); @@ -137,7 +139,13 @@ namespace hip_impl void* lck_stream = nullptr; auto acc_v = lock_stream_hip_(stream, lck_stream); auto stream_guard = make_RAII_guard( - [](){ /* perhaps use a slimmed down ihipPrintKernelLaunch here */ }, + std::bind( + print_prelaunch_trace_, + kernel_name, + num_blocks, + dim_blocks, + group_mem_bytes, + stream), std::bind( unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v)); @@ -841,16 +849,15 @@ namespace hip_impl group_mem_bytes,\ stream,\ ...)\ - do {\ - hipLaunchKernelGGL(\ - kernel_name,\ - num_blocks,\ - dim_blocks,\ - group_mem_bytes,\ - stream,\ - hipLaunchParm{},\ - ##__VA_ARGS__);\ - } while(0) - + do {\ + hipLaunchKernelGGL(\ + kernel_name,\ + num_blocks,\ + dim_blocks,\ + group_mem_bytes,\ + stream,\ + hipLaunchParm{},\ + ##__VA_ARGS__);\ + } while(0) } #endif //GENERIC_GRID_LAUNCH diff --git a/projects/hip/include/hip/hcc_detail/helpers.hpp b/projects/hip/include/hip/hcc_detail/helpers.hpp index e5a84a4678..611929766b 100644 --- a/projects/hip/include/hip/hcc_detail/helpers.hpp +++ b/projects/hip/include/hip/hcc_detail/helpers.hpp @@ -21,6 +21,7 @@ THE SOFTWARE. */ #pragma once +#include "concepts.hpp" #include // For std::conditional, std::decay, std::enable_if, // std::false_type, std result_of and std::true_type. @@ -29,9 +30,6 @@ THE SOFTWARE. namespace std { // TODO: these should be removed as soon as possible. #if (__cplusplus < 201406L) - template - using void_t = void; - #if (__cplusplus < 201402L) template using enable_if_t = typename enable_if::type; @@ -43,88 +41,80 @@ namespace std using result_of_t = typename result_of::type; template using remove_reference_t = typename remove_reference::type; - template< - FunctionalProcedure F, - unsigned int n = 0u, - typename = void> - struct is_callable_impl : is_callable_impl {}; - - // Pointer to member function, call through non-pointer. - template - struct is_callable_impl< - F(C, Ts...), - 0u, - void_t().*declval())(declval()...))> - > : true_type { - }; - - // Pointer to member function, call through pointer. - template - struct is_callable_impl< - F(C, Ts...), - 1u, - void_t()).*declval())(declval()...))> - > : std::true_type { - }; - - // Pointer to member data, call through non-pointer, no args. - template - struct is_callable_impl< - F(C), - 2u, - void_t().*declval())> - > : true_type { - }; - - // Pointer to member data, call through pointer, no args. - template - struct is_callable_impl< - F(C), - 3u, - void_t().*declval())> - > : true_type { - }; - - // General call, n args. - template - struct is_callable_impl< - F(Ts...), - 4u, - void_t()(declval()...))> - > : true_type { - }; - - // Not callable. - template - struct is_callable_impl : false_type {}; - - template - struct is_callable : is_callable_impl {}; - #else - template - struct is_callable_impl : false_type {}; - - template - struct is_callable_impl< - F(Ts...), - void_t>> : true_type {}; - - template - struct is_callable : is_callable_impl {}; #endif - template - struct disjunction : false_type {}; - template - struct disjunction : B1 {}; - template - struct disjunction - : conditional_t> - {}; #endif } -namespace hip_impl // Only for documentation, macros ignore namespaces. +namespace hip_impl { + template + using void_t_ = void; + + #if (__cplusplus < 201402L) + template< + FunctionalProcedure F, + unsigned int n = 0u, + typename = void> + struct is_callable_impl : is_callable_impl {}; + + // Pointer to member function, call through non-pointer. + template + struct is_callable_impl< + F(C, Ts...), + 0u, + void_t_().*std::declval())( + std::declval()...))> + > : std::true_type {}; + + // Pointer to member function, call through pointer. + template + struct is_callable_impl< + F(C, Ts...), + 1u, + void_t_()).*std::declval())( + std::declval()...))> + > : std::true_type {}; + + // Pointer to member data, call through non-pointer, no args. + template + struct is_callable_impl< + F(C), + 2u, + void_t_().*std::declval())> + > : std::true_type {}; + + // Pointer to member data, call through pointer, no args. + template + struct is_callable_impl< + F(C), + 3u, + void_t_().*std::declval())> + > : std::true_type {}; + + // General call, n args. + template + struct is_callable_impl< + F(Ts...), + 4u, + void_t_()(std::declval()...))> + > : std::true_type {}; + + // Not callable. + template + struct is_callable_impl : std::false_type {}; + + template + struct is_callable : is_callable_impl {}; + #else + template + struct is_callable_impl : std::false_type {}; + + template + struct is_callable_impl< + F(Ts...), + void_t_>> : std::true_type {}; + #endif + #define count_macro_args_impl_hip_(\ _0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15,\ _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29,\ diff --git a/projects/hip/include/hip/hcc_detail/hip_complex.h b/projects/hip/include/hip/hcc_detail/hip_complex.h index 26d73a21a8..c76d65b058 100644 --- a/projects/hip/include/hip/hcc_detail/hip_complex.h +++ b/projects/hip/include/hip/hcc_detail/hip_complex.h @@ -24,6 +24,7 @@ THE SOFTWARE. #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H #include "hip/hcc_detail/hip_vector_types.h" +#include #if __cplusplus #define COMPLEX_ADD_OP_OVERLOAD(type) \ diff --git a/projects/hip/include/hip/hcc_detail/hip_vector_types.h b/projects/hip/include/hip/hcc_detail/hip_vector_types.h index 82bd3b2d6f..35c6c23548 100644 --- a/projects/hip/include/hip/hcc_detail/hip_vector_types.h +++ b/projects/hip/include/hip/hcc_detail/hip_vector_types.h @@ -1270,6 +1270,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \ #define DECLOP_1VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ return lhs.x op rhs.x; \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \ + return lhs.x op rhs.x; \ +} \ +__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \ + return lhs.x op rhs.x ; \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \ + return lhs.x op rhs.x ; \ } #define DECLOP_1VAR_1IN_1OUT(type, op) \ @@ -1338,6 +1347,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \ #define DECLOP_2VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ +} \ +__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y); \ } #define DECLOP_2VAR_1IN_1OUT(type, op) \ @@ -1415,7 +1433,16 @@ __device__ __host__ static inline type operator op (type& val, int) { \ #define DECLOP_3VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ -} +} \ +__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ +} \ +__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \ +} \ #define DECLOP_3VAR_1IN_1OUT(type, op) \ __device__ __host__ static inline type operator op(type &rhs) { \ @@ -1500,6 +1527,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \ #define DECLOP_4VAR_COMP(type, op) \ __device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \ return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ +} \ +__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ +} \ +__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \ + return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \ } #define DECLOP_4VAR_1IN_1OUT(type, op) \ diff --git a/projects/hip/include/hip/hcc_detail/host_defines.h b/projects/hip/include/hip/hcc_detail/host_defines.h index b0a7421d18..5864cfa0e7 100644 --- a/projects/hip/include/hip/hcc_detail/host_defines.h +++ b/projects/hip/include/hip/hcc_detail/host_defines.h @@ -48,7 +48,7 @@ THE SOFTWARE. #define __global__ __attribute__((hc_grid_launch)) __attribute__((used)) #else //#warning "GGL global define reached" -#define __global__ [[hc]] __attribute__((weak)) +#define __global__ __attribute__((hc, weak)) #endif //GENERIC_GRID_LAUNCH #define __noinline__ __attribute__((noinline)) diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index c3b8186fd3..9faff2743a 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -51,6 +51,7 @@ __device__ float exp10f(float x); __device__ float exp2f(float x); __device__ float expf(float x); __device__ float expm1f(float x); +__device__ int abs(int x); __device__ float fabsf(float x); __device__ float fdimf(float x, float y); __device__ float fdividef(float x, float y); diff --git a/projects/hip/include/hip/hip_fp16.h b/projects/hip/include/hip/hip_fp16.h index 0e002d9396..95879dba50 100644 --- a/projects/hip/include/hip/hip_fp16.h +++ b/projects/hip/include/hip/hip_fp16.h @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef HIP_INCLUDE_HIP_HIP_FP16_H +#ifndef HIP_INCLUDE_HIP_HIP_FP16_H #define HIP_INCLUDE_HIP_HIP_FP16_H #include diff --git a/projects/hip/packaging/hip-targets-release.cmake b/projects/hip/packaging/hip-targets-release.cmake new file mode 100644 index 0000000000..ba0a5005f5 --- /dev/null +++ b/projects/hip/packaging/hip-targets-release.cmake @@ -0,0 +1,41 @@ +#---------------------------------------------------------------- +# Generated CMake target import file for configuration "Release". +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Import target "hip::hip_hcc_static" for configuration "Release" +set_property(TARGET hip::hip_hcc_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(hip::hip_hcc_static PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hc_am" + IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_hcc_static.a" + ) + +list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc_static ) +list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc_static "/opt/rocm/hip/lib/libhip_hcc_static.a" ) + +# Import target "hip::hip_hcc" for configuration "Release" +set_property(TARGET hip::hip_hcc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(hip::hip_hcc PROPERTIES + IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt;hcc::hc_am" + IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_hcc.so" + IMPORTED_SONAME_RELEASE "libhip_hcc.so" + ) + +list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc ) +list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "/opt/rocm/hip/lib/libhip_hcc.so" ) + +# Import target "hip::hip_device" for configuration "Release" +set_property(TARGET hip::hip_device APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) +set_target_properties(hip::hip_device PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" + IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_device.a" + ) + +list(APPEND _IMPORT_CHECK_TARGETS hip::hip_device ) +list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_device "/opt/rocm/hip/lib/libhip_device.a" ) + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) diff --git a/projects/hip/packaging/hip-targets.cmake b/projects/hip/packaging/hip-targets.cmake new file mode 100644 index 0000000000..65370eec9e --- /dev/null +++ b/projects/hip/packaging/hip-targets.cmake @@ -0,0 +1,102 @@ +# Generated by CMake 3.5.1 + +if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) + message(FATAL_ERROR "CMake >= 2.6.0 required") +endif() +cmake_policy(PUSH) +cmake_policy(VERSION 2.6) +#---------------------------------------------------------------- +# Generated CMake target import file. +#---------------------------------------------------------------- + +# Commands may need to know the format version. +set(CMAKE_IMPORT_FILE_VERSION 1) + +# Protect against multiple inclusion, which would fail when already imported targets are added once more. +set(_targetsDefined) +set(_targetsNotDefined) +set(_expectedTargets) +foreach(_expectedTarget hip::hip_hcc_static hip::hip_hcc hip::hip_device) + list(APPEND _expectedTargets ${_expectedTarget}) + if(NOT TARGET ${_expectedTarget}) + list(APPEND _targetsNotDefined ${_expectedTarget}) + endif() + if(TARGET ${_expectedTarget}) + list(APPEND _targetsDefined ${_expectedTarget}) + endif() +endforeach() +if("${_targetsDefined}" STREQUAL "${_expectedTargets}") + set(CMAKE_IMPORT_FILE_VERSION) + cmake_policy(POP) + return() +endif() +if(NOT "${_targetsDefined}" STREQUAL "") + message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") +endif() +unset(_targetsDefined) +unset(_targetsNotDefined) +unset(_expectedTargets) + + +# The installation prefix configured by this project. +set(_IMPORT_PREFIX "/opt/rocm/hip") + +# Create imported target hip::hip_hcc_static +add_library(hip::hip_hcc_static STATIC IMPORTED) + +set_target_properties(hip::hip_hcc_static PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" +) + +# Create imported target hip::hip_hcc +add_library(hip::hip_hcc SHARED IMPORTED) + +set_target_properties(hip::hip_hcc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" +) + +# Create imported target hip::hip_device +add_library(hip::hip_device STATIC IMPORTED) + +set_target_properties(hip::hip_device PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include" +) + +# Load information for each installed configuration. +get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +file(GLOB CONFIG_FILES "${_DIR}/hip-targets-*.cmake") +foreach(f ${CONFIG_FILES}) + include(${f}) +endforeach() + +# Cleanup temporary variables. +set(_IMPORT_PREFIX) + +# Loop over all imported files and verify that they actually exist +foreach(target ${_IMPORT_CHECK_TARGETS} ) + foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) + if(NOT EXISTS "${file}" ) + message(FATAL_ERROR "The imported target \"${target}\" references the file + \"${file}\" +but this file does not exist. Possible reasons include: +* The file was deleted, renamed, or moved to another location. +* An install or uninstall procedure did not complete successfully. +* The installation package was faulty and contained + \"${CMAKE_CURRENT_LIST_FILE}\" +but not all the files it references. +") + endif() + endforeach() + unset(_IMPORT_CHECK_FILES_FOR_${target}) +endforeach() +unset(_IMPORT_CHECK_TARGETS) + +# This file does not depend on other imported targets which have +# been exported from the same project but in a separate export set. + +# Commands beyond this point should not need to know the version. +set(CMAKE_IMPORT_FILE_VERSION) +cmake_policy(POP) diff --git a/projects/hip/packaging/hip_hcc.txt b/projects/hip/packaging/hip_hcc.txt index 7dd65033fd..7118c32eb9 100644 --- a/projects/hip/packaging/hip_hcc.txt +++ b/projects/hip/packaging/hip_hcc.txt @@ -6,6 +6,8 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll @hip_SOURCE_DIR@/src/hip_hc_gfx803.ll DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) +install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) ############################# # Packaging steps diff --git a/projects/hip/src/grid_launch.cpp b/projects/hip/src/grid_launch.cpp index 7739995600..cac01df7dc 100644 --- a/projects/hip/src/grid_launch.cpp +++ b/projects/hip/src/grid_launch.cpp @@ -27,6 +27,9 @@ THE SOFTWARE. #include "hc.hpp" #include "trace_helper.h" +#include +#include + namespace hip_impl { hc::accelerator_view lock_stream_hip_( @@ -42,6 +45,39 @@ namespace hip_impl return (*static_cast(locked_stream))->_av; } + void print_prelaunch_trace_( + const char* kernel_name, + dim3 num_blocks, + dim3 dim_blocks, + int group_mem_bytes, + hipStream_t stream) + { + if ((HIP_TRACE_API & (1 << TRACE_CMD)) || + HIP_PROFILE_API || + (COMPILE_HIP_DB && HIP_TRACE_API)) { + std::stringstream os; + os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum() + << " hipLaunchKernel '" << kernel_name << "'" + << " gridDim:" << num_blocks + << " groupDim:" << dim_blocks + << " sharedMem:+" << group_mem_bytes + << " " << *stream; + + if (HIP_PROFILE_API == 0x1) { + std::string shortAtpString("hipLaunchKernel:"); + shortAtpString += kernel_name; + MARKER_BEGIN(shortAtpString.c_str(), "HIP"); + } else if (HIP_PROFILE_API == 0x2) { + MARKER_BEGIN(os.str().c_str(), "HIP"); + } + + if (COMPILE_HIP_DB && HIP_TRACE_API) { + std::cerr << API_COLOR << os.str() << API_COLOR_END + << std::endl; + } + } + } + void unlock_stream_hip_( hipStream_t stream, void* locked_stream, diff --git a/projects/hip/src/math_functions.cpp b/projects/hip/src/math_functions.cpp index 92cc8689fc..3472216309 100644 --- a/projects/hip/src/math_functions.cpp +++ b/projects/hip/src/math_functions.cpp @@ -114,6 +114,10 @@ __device__ float expm1f(float x) { return hc::precise_math::expm1f(x); } +__device__ int abs(int x) +{ + return x >= 0 ? x : -x; // TODO - optimize with OCML +} __device__ float fabsf(float x) { return hc::precise_math::fabsf(x);