Merge branch 'amd-develop' into amd-master

Change-Id: I05572d2b32f1df70b54e2efeb32c8a4d8055912d


[ROCm/hip commit: 3a56e5c09b]
Этот коммит содержится в:
Maneesh Gupta
2017-04-13 12:39:28 +05:30
родитель 60093c286f 8c52b6a748
Коммит bb976eb6ad
14 изменённых файлов: 374 добавлений и 129 удалений
+5 -5
Просмотреть файл
@@ -142,7 +142,7 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER)
# Build steps
#############################
# Rebuild cmake cache updates .hipInfo and .hipVersion
add_custom_target(update_build_and_version_info ALL COMMAND make rebuild_cache)
add_custom_target(update_build_and_version_info COMMAND make rebuild_cache)
# Build clang hipify if enabled
add_subdirectory(hipify-clang)
@@ -372,14 +372,14 @@ endif()
# Testing steps
#############################
# Target: test
set(HIP_PATH ${CMAKE_INSTALL_PREFIX})
set(HIP_ROOT_DIR ${CMAKE_INSTALL_PREFIX})
set(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR})
execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/cmake" "${HIP_PATH}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET)
execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/cmake" "${HIP_ROOT_DIR}/cmake" RESULT_VARIABLE RUN_HIT ERROR_QUIET)
if(${RUN_HIT} EQUAL 0)
execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/bin" "${HIP_PATH}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET)
execute_process(COMMAND "${CMAKE_COMMAND}" -E copy_directory "${HIP_SRC_PATH}/bin" "${HIP_ROOT_DIR}/bin" RESULT_VARIABLE RUN_HIT ERROR_QUIET)
endif()
if(${RUN_HIT} EQUAL 0)
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "${HIP_ROOT_DIR}/cmake" ${CMAKE_MODULE_PATH})
include(${HIP_SRC_PATH}/tests/hit/HIT.cmake)
# Add tests
+53 -28
Просмотреть файл
@@ -1,5 +1,4 @@
# HIP Bugs
# HIP Bugs
<!-- toc -->
- [Errors related to undefined reference to `__hcLaunchKernel__***__grid_launch_parm**`](#errors-related-to-undefined-reference-to-__hclaunchkernel____grid_launch_parm)
@@ -41,60 +40,86 @@ For example, `Foo` in the code snippets below contains an array-typed member var
```
struct Foo {
float _data;
// table is an array, which makes foo
int table[3];
};
```
An workaround is to provide a custom serializer on CPU side, and append the contents of the array as kernel arguments:
A workaround is to provide a custom serializer on host side which appends the contents of the array as kernel arguments, and a custome deserializaer on the device path to reconstruct the array inside the GPU kernels.
The deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example:
```
struct Foo {
int table[3];
float _data;
int _table[3];
// user-provided CPU serializer
// must append the contents of the array member as kernel arguments
#ifdef __HCC__
// user-provided CPU serializer
// Append the contents of the array member as kernel arguments
__attribute__((annotate(“serialize”)))
void __cxxamp_serialize(Kalmar::Serialize &s) const {
s.Append(sizeof(float), &_data);
for (int i = 0; i < 3; ++i)
s.Append(sizeof(int), &table[i]);
s.Append(sizeof(int), &_table[i]);
}
#endif
};
```
Then, provide a custom deserializer on GPU side, to help reconstruct the array within GPU kernels. Notice that the deserializer can not be a function template, and should have scalar-typed parameters of the number equals to the length of the array-typed member variable. For example:
```
struct Foo {
int table[3];
// user-provided GPU deserializer
// table has 3 int elements, so deserializer must have 3 int parameters.
#ifdef __HCC__
__attribute__((annotate(“user_deserialize”)))
Foo(int x0, int x1, int x2) [[cpu]][[hc]] {
table[0] = x0;
table[1] = x1;
table[2] = x2;
Foo(float d, int x0, int x1, int x2) [[cpu]][[hc]] {
_data = d;
_table[0] = x0;
_table[1] = x1;
_table[2] = x2;
}
#endif
#ifdef __HCC__
__attribute__((annotate(“serialize”)))
void __cxxamp_serialize(Kalmar::Serialize &s) const {
s.Append(sizeof(int), &table[0]);
s.Append(sizeof(int), &table[1]);
s.Append(sizeof(int), &table[2]);
}
#endif
};
```
Rather than create serializer functions, another workaround is to pass the member fields from the structure as simple data types.
Note a class or struct can contain only one "user_deserialize" constructor.
For types which contain arrays which are based on template parameter, you can use partial template instantiation to implement one constructor per specialization.
However, an easier approach may be to create one user_deserializer which processes the maximum supported dimension.
This will take more memory in the structure and also require additional kernel arguments, but this may have little performance impact and the conversion is easier than partial template specialization. An example:
```
#define MAX_Dim 4
template<typename T, int Dim> struct MyArray {
T* dataPtr_;
//int size_[Dim]; // Original code with template-sized Dims
int size_[MAX_dim]; // Workaround code - allocate an array big enough for all dims so one serializer works.
...
#ifdef __HCC__
__attribute__((annotate("serialize")))
void __cxxamp_serialize(Kalmar::Serialize &s) const {
s.Append(sizeof(float), &_dataPtr);
for (int i=0; i<MAX_Dim; i++) {
s.Append(sizeof(size_[0]), &size_[i]);
}
}
__attribute__((annotate("user_deserialize")))
MyArray(T* data, int size0, int size1, int size2, int size3) [[cpu]][[hc]] {
data_ = data;
size_[0] = size0;
size_[1] = size1;
size_[2] = size2;
size_[3] = size3;
}
#endif
```
### HIP is more restrictive in enforcing restrictions
+20 -13
Просмотреть файл
@@ -21,6 +21,7 @@ THE SOFTWARE.
*/
#pragma once
#if GENERIC_GRID_LAUNCH == 1
#include "concepts.hpp"
@@ -71,7 +72,7 @@ namespace hip_impl
template<FunctionalProcedure F, typename... Ts>
using is_new_grid_launch_t = typename std::conditional<
std::is_callable<F(Ts...)>{},
is_callable<F(Ts...)>{},
New_grid_launch_tag,
Old_grid_launch_tag>::type;
}
@@ -118,6 +119,7 @@ namespace hip_impl
// TODO: these are workarounds, they should be removed.
hc::accelerator_view lock_stream_hip_(hipStream_t&, void*&);
void print_prelaunch_trace_(const char*, dim3, dim3, int, hipStream_t);
void unlock_stream_hip_(
hipStream_t, void*, const char*, hc::accelerator_view*);
@@ -137,7 +139,13 @@ namespace hip_impl
void* lck_stream = nullptr;
auto acc_v = lock_stream_hip_(stream, lck_stream);
auto stream_guard = make_RAII_guard(
[](){ /* perhaps use a slimmed down ihipPrintKernelLaunch here */ },
std::bind(
print_prelaunch_trace_,
kernel_name,
num_blocks,
dim_blocks,
group_mem_bytes,
stream),
std::bind(
unlock_stream_hip_, stream, lck_stream, kernel_name, &acc_v));
@@ -841,16 +849,15 @@ namespace hip_impl
group_mem_bytes,\
stream,\
...)\
do {\
hipLaunchKernelGGL(\
kernel_name,\
num_blocks,\
dim_blocks,\
group_mem_bytes,\
stream,\
hipLaunchParm{},\
##__VA_ARGS__);\
} while(0)
do {\
hipLaunchKernelGGL(\
kernel_name,\
num_blocks,\
dim_blocks,\
group_mem_bytes,\
stream,\
hipLaunchParm{},\
##__VA_ARGS__);\
} while(0)
}
#endif //GENERIC_GRID_LAUNCH
+70 -80
Просмотреть файл
@@ -21,6 +21,7 @@ THE SOFTWARE.
*/
#pragma once
#include "concepts.hpp"
#include <type_traits> // For std::conditional, std::decay, std::enable_if,
// std::false_type, std result_of and std::true_type.
@@ -29,9 +30,6 @@ THE SOFTWARE.
namespace std
{ // TODO: these should be removed as soon as possible.
#if (__cplusplus < 201406L)
template<typename...>
using void_t = void;
#if (__cplusplus < 201402L)
template<bool cond, typename T = void>
using enable_if_t = typename enable_if<cond, T>::type;
@@ -43,88 +41,80 @@ namespace std
using result_of_t = typename result_of<F(Ts...)>::type;
template<typename T>
using remove_reference_t = typename remove_reference<T>::type;
template<
FunctionalProcedure F,
unsigned int n = 0u,
typename = void>
struct is_callable_impl : is_callable_impl<F, n + 1u> {};
// Pointer to member function, call through non-pointer.
template<FunctionalProcedure F, typename C, typename... Ts>
struct is_callable_impl<
F(C, Ts...),
0u,
void_t<decltype((declval<C>().*declval<F>())(declval<Ts>()...))>
> : true_type {
};
// Pointer to member function, call through pointer.
template<FunctionalProcedure F, typename C, typename... Ts>
struct is_callable_impl<
F(C, Ts...),
1u,
void_t<decltype(((*declval<C>()).*declval<F>())(declval<Ts>()...))>
> : std::true_type {
};
// Pointer to member data, call through non-pointer, no args.
template<FunctionalProcedure F, typename C>
struct is_callable_impl<
F(C),
2u,
void_t<decltype(declval<C>().*declval<F>())>
> : true_type {
};
// Pointer to member data, call through pointer, no args.
template<FunctionalProcedure F, typename C>
struct is_callable_impl<
F(C),
3u,
void_t<decltype(*declval<C>().*declval<F>())>
> : true_type {
};
// General call, n args.
template<FunctionalProcedure F, typename... Ts>
struct is_callable_impl<
F(Ts...),
4u,
void_t<decltype(declval<F>()(declval<Ts>()...))>
> : true_type {
};
// Not callable.
template<FunctionalProcedure F>
struct is_callable_impl<F, 5u> : false_type {};
template<typename Call>
struct is_callable : is_callable_impl<Call> {};
#else
template<typename, typename = void>
struct is_callable_impl : false_type {};
template<FunctionalProcedure F, typename... Ts>
struct is_callable_impl<
F(Ts...),
void_t<result_of_t<F(Ts...)>>> : true_type {};
template<typename F>
struct is_callable : is_callable_impl<F> {};
#endif
template<typename...>
struct disjunction : false_type {};
template<typename B1>
struct disjunction<B1> : B1 {};
template<typename B1, typename... Bs>
struct disjunction<B1, Bs...>
: conditional_t<B1{} == true, B1, disjunction<Bs...>>
{};
#endif
}
namespace hip_impl // Only for documentation, macros ignore namespaces.
namespace hip_impl
{
template<typename...>
using void_t_ = void;
#if (__cplusplus < 201402L)
template<
FunctionalProcedure F,
unsigned int n = 0u,
typename = void>
struct is_callable_impl : is_callable_impl<F, n + 1u> {};
// Pointer to member function, call through non-pointer.
template<FunctionalProcedure F, typename C, typename... Ts>
struct is_callable_impl<
F(C, Ts...),
0u,
void_t_<decltype((std::declval<C>().*std::declval<F>())(
std::declval<Ts>()...))>
> : std::true_type {};
// Pointer to member function, call through pointer.
template<FunctionalProcedure F, typename C, typename... Ts>
struct is_callable_impl<
F(C, Ts...),
1u,
void_t_<decltype(((*std::declval<C>()).*std::declval<F>())(
std::declval<Ts>()...))>
> : std::true_type {};
// Pointer to member data, call through non-pointer, no args.
template<FunctionalProcedure F, typename C>
struct is_callable_impl<
F(C),
2u,
void_t_<decltype(std::declval<C>().*std::declval<F>())>
> : std::true_type {};
// Pointer to member data, call through pointer, no args.
template<FunctionalProcedure F, typename C>
struct is_callable_impl<
F(C),
3u,
void_t_<decltype(*std::declval<C>().*std::declval<F>())>
> : std::true_type {};
// General call, n args.
template<FunctionalProcedure F, typename... Ts>
struct is_callable_impl<
F(Ts...),
4u,
void_t_<decltype(std::declval<F>()(std::declval<Ts>()...))>
> : std::true_type {};
// Not callable.
template<FunctionalProcedure F>
struct is_callable_impl<F, 5u> : std::false_type {};
template<typename Call>
struct is_callable : is_callable_impl<Call> {};
#else
template<typename, typename = void>
struct is_callable_impl : std::false_type {};
template<FunctionalProcedure F, typename... Ts>
struct is_callable_impl<
F(Ts...),
void_t_<std::result_of_t<F(Ts...)>>> : std::true_type {};
#endif
#define count_macro_args_impl_hip_(\
_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15,\
_16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29,\
+1
Просмотреть файл
@@ -24,6 +24,7 @@ THE SOFTWARE.
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_COMPLEX_H
#include "hip/hcc_detail/hip_vector_types.h"
#include <math.h>
#if __cplusplus
#define COMPLEX_ADD_OP_OVERLOAD(type) \
+37 -1
Просмотреть файл
@@ -1270,6 +1270,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \
#define DECLOP_1VAR_COMP(type, op) \
__device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \
return lhs.x op rhs.x; \
} \
__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \
return lhs.x op rhs.x; \
} \
__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \
return lhs.x op rhs.x ; \
} \
__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \
return lhs.x op rhs.x ; \
}
#define DECLOP_1VAR_1IN_1OUT(type, op) \
@@ -1338,6 +1347,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \
#define DECLOP_2VAR_COMP(type, op) \
__device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y); \
} \
__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y); \
} \
__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y); \
} \
__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y); \
}
#define DECLOP_2VAR_1IN_1OUT(type, op) \
@@ -1415,7 +1433,16 @@ __device__ __host__ static inline type operator op (type& val, int) { \
#define DECLOP_3VAR_COMP(type, op) \
__device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \
}
} \
__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \
} \
__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \
} \
__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z); \
} \
#define DECLOP_3VAR_1IN_1OUT(type, op) \
__device__ __host__ static inline type operator op(type &rhs) { \
@@ -1500,6 +1527,15 @@ __device__ __host__ static inline type operator op (type& val, int) { \
#define DECLOP_4VAR_COMP(type, op) \
__device__ __host__ static inline bool operator op (type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \
} \
__device__ __host__ static inline bool operator op (const type& lhs, type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \
} \
__device__ __host__ static inline bool operator op (type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \
} \
__device__ __host__ static inline bool operator op (const type& lhs, const type& rhs) { \
return (lhs.x op rhs.x) && (lhs.y op rhs.y) && (lhs.z op rhs.z) && (lhs.w op rhs.w); \
}
#define DECLOP_4VAR_1IN_1OUT(type, op) \
+1 -1
Просмотреть файл
@@ -48,7 +48,7 @@ THE SOFTWARE.
#define __global__ __attribute__((hc_grid_launch)) __attribute__((used))
#else
//#warning "GGL global define reached"
#define __global__ [[hc]] __attribute__((weak))
#define __global__ __attribute__((hc, weak))
#endif //GENERIC_GRID_LAUNCH
#define __noinline__ __attribute__((noinline))
+1
Просмотреть файл
@@ -51,6 +51,7 @@ __device__ float exp10f(float x);
__device__ float exp2f(float x);
__device__ float expf(float x);
__device__ float expm1f(float x);
__device__ int abs(int x);
__device__ float fabsf(float x);
__device__ float fdimf(float x, float y);
__device__ float fdividef(float x, float y);
+1 -1
Просмотреть файл
@@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifdef HIP_INCLUDE_HIP_HIP_FP16_H
#ifndef HIP_INCLUDE_HIP_HIP_FP16_H
#define HIP_INCLUDE_HIP_HIP_FP16_H
#include <hip/hip_common.h>
+41
Просмотреть файл
@@ -0,0 +1,41 @@
#----------------------------------------------------------------
# Generated CMake target import file for configuration "Release".
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Import target "hip::hip_hcc_static" for configuration "Release"
set_property(TARGET hip::hip_hcc_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(hip::hip_hcc_static PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX"
IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hc_am"
IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_hcc_static.a"
)
list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc_static )
list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc_static "/opt/rocm/hip/lib/libhip_hcc_static.a" )
# Import target "hip::hip_hcc" for configuration "Release"
set_property(TARGET hip::hip_hcc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(hip::hip_hcc PROPERTIES
IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt;hcc::hc_am"
IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_hcc.so"
IMPORTED_SONAME_RELEASE "libhip_hcc.so"
)
list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc )
list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "/opt/rocm/hip/lib/libhip_hcc.so" )
# Import target "hip::hip_device" for configuration "Release"
set_property(TARGET hip::hip_device APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(hip::hip_device PROPERTIES
IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX"
IMPORTED_LOCATION_RELEASE "/opt/rocm/hip/lib/libhip_device.a"
)
list(APPEND _IMPORT_CHECK_TARGETS hip::hip_device )
list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_device "/opt/rocm/hip/lib/libhip_device.a" )
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
+102
Просмотреть файл
@@ -0,0 +1,102 @@
# Generated by CMake 3.5.1
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5)
message(FATAL_ERROR "CMake >= 2.6.0 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.6)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_targetsDefined)
set(_targetsNotDefined)
set(_expectedTargets)
foreach(_expectedTarget hip::hip_hcc_static hip::hip_hcc hip::hip_device)
list(APPEND _expectedTargets ${_expectedTarget})
if(NOT TARGET ${_expectedTarget})
list(APPEND _targetsNotDefined ${_expectedTarget})
endif()
if(TARGET ${_expectedTarget})
list(APPEND _targetsDefined ${_expectedTarget})
endif()
endforeach()
if("${_targetsDefined}" STREQUAL "${_expectedTargets}")
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT "${_targetsDefined}" STREQUAL "")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n")
endif()
unset(_targetsDefined)
unset(_targetsNotDefined)
unset(_expectedTargets)
# The installation prefix configured by this project.
set(_IMPORT_PREFIX "/opt/rocm/hip")
# Create imported target hip::hip_hcc_static
add_library(hip::hip_hcc_static STATIC IMPORTED)
set_target_properties(hip::hip_hcc_static PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
)
# Create imported target hip::hip_hcc
add_library(hip::hip_hcc SHARED IMPORTED)
set_target_properties(hip::hip_hcc PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
)
# Create imported target hip::hip_device
add_library(hip::hip_device STATIC IMPORTED)
set_target_properties(hip::hip_device PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;/opt/rocm/hsa/include"
)
# Load information for each installed configuration.
get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
file(GLOB CONFIG_FILES "${_DIR}/hip-targets-*.cmake")
foreach(f ${CONFIG_FILES})
include(${f})
endforeach()
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(target ${_IMPORT_CHECK_TARGETS} )
foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} )
if(NOT EXISTS "${file}" )
message(FATAL_ERROR "The imported target \"${target}\" references the file
\"${file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
unset(_IMPORT_CHECK_FILES_FOR_${target})
endforeach()
unset(_IMPORT_CHECK_TARGETS)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
+2
Просмотреть файл
@@ -6,6 +6,8 @@ install(FILES @PROJECT_BINARY_DIR@/libhip_hcc_static.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/libhip_device.a DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib)
install(FILES @hip_SOURCE_DIR@/src/hip_hc.ll @hip_SOURCE_DIR@/src/hip_hc_gfx803.ll DESTINATION lib)
install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip)
install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip)
#############################
# Packaging steps
+36
Просмотреть файл
@@ -27,6 +27,9 @@ THE SOFTWARE.
#include "hc.hpp"
#include "trace_helper.h"
#include <iostream>
#include <sstream>
namespace hip_impl
{
hc::accelerator_view lock_stream_hip_(
@@ -42,6 +45,39 @@ namespace hip_impl
return (*static_cast<L*>(locked_stream))->_av;
}
void print_prelaunch_trace_(
const char* kernel_name,
dim3 num_blocks,
dim3 dim_blocks,
int group_mem_bytes,
hipStream_t stream)
{
if ((HIP_TRACE_API & (1 << TRACE_CMD)) ||
HIP_PROFILE_API ||
(COMPILE_HIP_DB && HIP_TRACE_API)) {
std::stringstream os;
os << tls_tidInfo.tid() << "." << tls_tidInfo.apiSeqNum()
<< " hipLaunchKernel '" << kernel_name << "'"
<< " gridDim:" << num_blocks
<< " groupDim:" << dim_blocks
<< " sharedMem:+" << group_mem_bytes
<< " " << *stream;
if (HIP_PROFILE_API == 0x1) {
std::string shortAtpString("hipLaunchKernel:");
shortAtpString += kernel_name;
MARKER_BEGIN(shortAtpString.c_str(), "HIP");
} else if (HIP_PROFILE_API == 0x2) {
MARKER_BEGIN(os.str().c_str(), "HIP");
}
if (COMPILE_HIP_DB && HIP_TRACE_API) {
std::cerr << API_COLOR << os.str() << API_COLOR_END
<< std::endl;
}
}
}
void unlock_stream_hip_(
hipStream_t stream,
void* locked_stream,
+4
Просмотреть файл
@@ -114,6 +114,10 @@ __device__ float expm1f(float x)
{
return hc::precise_math::expm1f(x);
}
__device__ int abs(int x)
{
return x >= 0 ? x : -x; // TODO - optimize with OCML
}
__device__ float fabsf(float x)
{
return hc::precise_math::fabsf(x);