Re-sync with upstream.

This commit is contained in:
Alex Voicu
2018-10-10 11:43:49 +01:00
36 zmienionych plików z 856 dodań i 291 usunięć
+11 -1
Wyświetl plik
@@ -288,12 +288,22 @@ if(HIP_PLATFORM STREQUAL "hcc")
install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
include(CMakePackageConfigHelpers)
if(HIP_COMPILER STREQUAL "hcc")
configure_package_config_file(
hip-config.cmake.in
hip-config-hcc.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake
INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}
PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR
)
elseif(HIP_COMPILER STREQUAL "clang")
configure_package_config_file(
hip-config-clang.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake
INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}
PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR
)
endif()
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake
VERSION "${HIP_VERSION}"
+39 -2
Wyświetl plik
@@ -167,7 +167,8 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
}
// Cap the maximum amount of testing, in case of hangs
// Excluding hipPrintfKernel test from automation; variable fails on CI test machines
// Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133
// Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1
timeout(time: 1, unit: 'HOURS')
{
stage("${platform} unit testing")
@@ -177,7 +178,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
cd ${build_dir_rel}
make install -j\$(nproc)
make build_tests -i -j\$(nproc)
ctest -E hipVectorTypes
ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)"
"""
// If unit tests output a junit or xunit file in the future, jenkins can parse that file
// to display test results on the dashboard
@@ -385,4 +386,40 @@ rocm_head:
docker_clean_images( job_name, hip_image_name )
*/
}
},
cuda_9_x:
{
node('hip-cuda')
{
////////////////////////////////////////////////////////////////////////
// Block of string constants customizing behavior for cuda
String nvcc_ver = 'nvcc-9.x'
String from_image = 'ci_test_nodes/cuda-9.x/ubuntu-16.04:latest'
String inside_args = '--runtime=nvidia';
// Checkout source code, dependencies and version files
String source_hip_rel = checkout_and_version( nvcc_ver )
// Create/reuse a docker image that represents the hip build environment
def hip_build_image = docker_build_image( nvcc_ver, 'hip', '', source_hip_rel, from_image )
// Print system information for the log
hip_build_image.inside( inside_args )
{
sh """#!/usr/bin/env bash
set -x
nvidia-smi
nvcc --version
"""
}
// Conctruct a binary directory path based on build config
String build_hip_rel = build_directory_rel( build_config );
// Build hip inside of the build environment
docker_build_inside_image( hip_build_image, inside_args, nvcc_ver, "-DHIP_NVCC_FLAGS=--Wno-deprecated-gpu-targets", build_config, source_hip_rel, build_hip_rel )
// Clean docker image
docker_clean_images( 'hip', docker_build_image_name( ) )
}
}
@@ -35,9 +35,9 @@
| enum |***`cublasAtomicsMode_t`*** | |
| 0 |*`CUBLAS_ATOMICS_NOT_ALLOWED`* | |
| 1 |*`CUBLAS_ATOMICS_ALLOWED`* | |
| enum |***`cublasAtomicsMode_t`*** | |
| -1 |*`CUBLAS_GEMM_DFALT`* | |
| -1 |*`CUBLAS_GEMM_DEFAULT`* | |
| enum |***`cublasGemmAlgo_t`*** |***`hipblasGemmAlgo_t`*** |
| -1 |*`CUBLAS_GEMM_DFALT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 |
| -1 |*`CUBLAS_GEMM_DEFAULT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 |
| 0 |*`CUBLAS_GEMM_ALGO0`* | |
| 1 |*`CUBLAS_GEMM_ALGO1`* | |
| 2 |*`CUBLAS_GEMM_ALGO2`* | |
@@ -391,7 +391,7 @@
|`cublasZgemm3m` | |
|`cublasHgemm` |`hipblasHgemm` |
|`cublasSgemmEx` | |
|`cublasGemmEx` | |
|`cublasGemmEx` |`hipblasGemmEx` |
|`cublasCgemmEx` | |
|`cublasUint8gemmBias` | |
|`cublasSsyrk` | |
@@ -93,10 +93,10 @@
## **7. Occupancy**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-----------------------------------------------|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
## **8. Execution Control [deprecated since 7.0]**
@@ -326,13 +326,13 @@
## **25. Texture Object Management**
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| `cudaCreateTextureObject` |`hipCreateTextureObject` |
| `cudaDestroyTextureObject` |`hipDestroyTextureObject` |
| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` |
| **CUDA** | **HIP** |
|-----------------------------------------------------------|--------------------------------------|
| `cudaCreateTextureObject` |`hipCreateTextureObject` |
| `cudaDestroyTextureObject` |`hipDestroyTextureObject` |
| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` |
| `cudaGetTextureObjectResourceViewDesc` |`hipGetTextureObjectResourceViewDesc` |
| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` |
| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` |
## **26. Surface Object Management**
@@ -352,36 +352,36 @@
## **28. C++ API Routines**
*(7.0 contains, 7.5 doesnt)*
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| `cudaBindSurfaceToArray` | |
| `cudaBindTexture` | `hipBindTexture` |
| `cudaBindTexture2D` | |
| `cudaBindTextureToArray` | |
| `cudaBindTextureToMipmappedArray` | |
| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
| `cudaFuncGetAttributes` | |
| `cudaFuncSetCacheConfig` | |
| `cudaGetSymbolAddress` | |
| `cudaGetSymbolSize` | |
| `cudaGetTextureAlignmentOffset` | |
| `cudaLaunch` | |
| `cudaLaunchKernel` | |
| `cudaMallocHost` | |
| `cudaMallocManaged` | |
| `cudaMemcpyFromSymbol` | |
| `cudaMemcpyFromSymbolAsync` | |
| `cudaMemcpyToSymbol` | |
| `cudaMemcpyToSymbolAsync` | |
| **CUDA** | **HIP** |
|-----------------------------------------------------------|------------------------------------------------|
| `cudaBindSurfaceToArray` | |
| `cudaBindTexture` | `hipBindTexture` |
| `cudaBindTexture2D` | |
| `cudaBindTextureToArray` | |
| `cudaBindTextureToMipmappedArray` | |
| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
| `cudaFuncGetAttributes` | |
| `cudaFuncSetCacheConfig` | |
| `cudaGetSymbolAddress` | |
| `cudaGetSymbolSize` | |
| `cudaGetTextureAlignmentOffset` | |
| `cudaLaunch` | |
| `cudaLaunchKernel` | |
| `cudaMallocHost` | |
| `cudaMallocManaged` | |
| `cudaMemcpyFromSymbol` | |
| `cudaMemcpyFromSymbolAsync` | |
| `cudaMemcpyToSymbol` | |
| `cudaMemcpyToSymbolAsync` | |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
| `cudaSetupArgument` | |
| `cudaStreamAttachMemAsync` | |
| `cudaUnbindTexture` | `hipUnbindTexture` |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
| `cudaSetupArgument` | |
| `cudaStreamAttachMemAsync` | |
| `cudaUnbindTexture` | `hipUnbindTexture` |
## **30. Profiler Control**
@@ -395,8 +395,8 @@
## **31. Data types**
| **type** | **CUDA** | **HIP** |
|-------------:|-----------------------------------------------|------------------------------------------------------|
| **type** | **CUDA** | **HIP** |**HIP value** (if differs) |
|-------------:|-----------------------------------------------|------------------------------------------------------|---------------------------|
| struct | `cudaChannelFormatDesc` | `hipChannelFormatDesc` |
| struct | `cudaDeviceProp` | `hipDeviceProp_t` |
| struct | `cudaExtent` | `hipExtent` |
@@ -790,3 +790,19 @@
| define | `cudaTextureType1DLayered` | `hipTextureType1DLayered` |
| define | `cudaTextureType2DLayered` | `hipTextureType2DLayered` |
| define | `cudaTextureTypeCubemapLayered` | `hipTextureTypeCubemapLayered` |
| enum |***`cudaDataType_t`*** |***`hipblasDatatype_t`*** |
| enum |***`cudaDataType`*** |***`hipblasDatatype_t`*** |
| 2 |*`CUDA_R_16F`* |*`HIPBLAS_R_16F`* | 150 |
| 6 |*`CUDA_C_16F`* |*`HIPBLAS_C_16F`* | 153 |
| 0 |*`CUDA_R_32F`* |*`HIPBLAS_R_32F`* | 151 |
| 4 |*`CUDA_C_32F`* |*`HIPBLAS_C_32F`* | 154 |
| 1 |*`CUDA_R_64F`* |*`HIPBLAS_R_64F`* | 152 |
| 5 |*`CUDA_C_64F`* |*`HIPBLAS_C_64F`* | 155 |
| 3 |*`CUDA_R_8I`* | |
| 7 |*`CUDA_C_8I`* | |
| 8 |*`CUDA_R_8U`* | |
| 9 |*`CUDA_C_8U`* | |
| 10 |*`CUDA_R_32I`* | |
| 11 |*`CUDA_C_32I`* | |
| 12 |*`CUDA_R_32U`* | |
| 13 |*`CUDA_C_32U`* | |
@@ -239,8 +239,8 @@
|`cudnnSetTensor` |`hipdnnSetTensor` |
|`cudnnScaleTensor` |`hipdnnScaleTensor` |
|`cudnnCreateFilterDescriptor` |`hipdnnCreateFilterDescriptor` |
|`cudnnSetFilter4dDescriptor` | |
|`cudnnGetFilter4dDescriptor` | |
|`cudnnSetFilter4dDescriptor` |`hipdnnSetFilter4dDescriptor` |
|`cudnnGetFilter4dDescriptor` |`hipdnnGetFilter4dDescriptor` |
|`cudnnSetFilterNdDescriptor` |`hipdnnSetFilterNdDescriptor` |
|`cudnnGetFilterNdDescriptor` |`hipdnnGetFilterNdDescriptor` |
|`cudnnDestroyFilterDescriptor` |`hipdnnDestroyFilterDescriptor` |
@@ -91,8 +91,22 @@ Setting HCC_UNPINNED_COPY_MODE = 3, forces all unpinned transfer to use direct m
Following environment variables can be used to control the transfer thresholds:
- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64.
- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64.
- HCC_H2D_PININPLACE_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic. By default it is set to 4096.
- HCC_D2H_PININPLACE_THRESHOLD - Threshold in KB for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic. By default it is set to 1024.
## Device-Side Malloc
hip-hcc and hip-clang supports device-side malloc and free. Users can allocate
memory dynamically in a kernel. The allocated memory are in global address
space, however, different threads get different memory allocations for the same
call of malloc. The allocated memory can be accessed or freed by other threads
or other kernels. It persists in the life time of the HIP program until it is
freed.
The memory are allocated in pages. Users can define macro
`__HIP_SIZE_OF_PAGE` for controlling the page size in bytes and macro
`__HIP_NUM_PAGES` for controlling the total number of pages that can be
allocated.
@@ -48,9 +48,6 @@ set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" )
set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc")
set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig")
if(HIP_COMPILER STREQUAL "hcc")
find_dependency(hcc)
endif()
include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
set( hip_LIBRARIES hip::host hip::device)
+65
Wyświetl plik
@@ -0,0 +1,65 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND)
if (NOT _CMakeFindDependencyMacro_FOUND)
macro(find_dependency dep)
if (NOT ${dep}_FOUND)
set(cmake_fd_version)
if (${ARGC} GREATER 1)
set(cmake_fd_version ${ARGV1})
endif()
set(cmake_fd_exact_arg)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT)
set(cmake_fd_exact_arg EXACT)
endif()
set(cmake_fd_quiet_arg)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
set(cmake_fd_quiet_arg QUIET)
endif()
set(cmake_fd_required_arg)
if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
set(cmake_fd_required_arg REQUIRED)
endif()
find_package(${dep} ${cmake_fd_version}
${cmake_fd_exact_arg}
${cmake_fd_quiet_arg}
${cmake_fd_required_arg}
)
string(TOUPPER ${dep} cmake_dep_upper)
if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND)
set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.")
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False)
return()
endif()
set(cmake_fd_version)
set(cmake_fd_required_arg)
set(cmake_fd_quiet_arg)
set(cmake_fd_exact_arg)
endif()
endmacro()
endif()
set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" )
set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" )
set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" )
set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" )
set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc")
set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig")
find_dependency(hcc)
include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
set( hip_LIBRARIES hip::host hip::device)
set( hip_LIBRARY ${hip_LIBRARIES})
set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR})
set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS})
set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR})
set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR})
set(HIP_LIBRARIES ${hip_LIBRARIES})
set(HIP_LIBRARY ${hip_LIBRARY})
set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE})
set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE})
+2 -1
Wyświetl plik
@@ -89,7 +89,8 @@ if (HIPIFY_CLANG_TESTS)
if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR
(CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR
(CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR
(CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0"))
(CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR
CUDA_VERSION VERSION_EQUAL "10.0")
message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by clang ${LLVM_PACKAGE_VERSION}.")
if (CUDA_VERSION VERSION_LESS "7.0")
message(STATUS "Please install CUDA 7.0 or higher.")
+11 -4
Wyświetl plik
@@ -30,11 +30,16 @@
## <a name="dependencies"></a> Dependencies
`hipify-clang` requires:
1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended release is 6.0.1.
2. CUDA at least version 7.5, latest supported release is 9.0.
1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended releases:
6.0.1 (linux and windows),
| **LLVM release version** | **CUDA latest supported version** |
|:------------------------:|:---------------------------------:|
7.0.0 (linux only).
2. CUDA at least version 7.5, latest supported release is 9.2.
| **LLVM release version** | **CUDA latest supported version** | **Comments** |
|:------------------------:|:---------------------------------:|:------------:|
| 3.8.0 | 7.5 |
| 3.8.1 | 7.5 |
| 3.9.0 | 7.5 |
@@ -46,6 +51,8 @@
| 5.0.2 | 8.0 |
| 6.0.0 | 9.0 |
| 6.0.1 | 9.0 |
| 7.0.0 | 9.2 | linux only |
| | 10.0 | not yet supported |
In most cases, you can get a suitable version of LLVM+CLANG with your package manager.
+22 -25
Wyświetl plik
@@ -1417,22 +1417,23 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
/////////////////////////////// CUDA RT API ///////////////////////////////
// Data types
{"cudaDataType_t", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaDataType", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_16F", {"hipR16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_16F", {"hipC16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_32F", {"hipR32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_32F", {"hipC32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_64F", {"hipR64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_64F", {"hipC64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_8I", {"hipR8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_8I", {"hipC8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_8U", {"hipR8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_8U", {"hipC8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_32I", {"hipR32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_32I", {"hipC32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_R_32U", {"hipR32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"CUDA_C_32U", {"hipC32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
// TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP
{"cudaDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}},
{"cudaDataType", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}},
{"CUDA_R_16F", {"HIPBLAS_R_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 // 150
{"CUDA_C_16F", {"HIPBLAS_C_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 // 153
{"CUDA_R_32F", {"HIPBLAS_R_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 // 151
{"CUDA_C_32F", {"HIPBLAS_C_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 // 154
{"CUDA_R_64F", {"HIPBLAS_R_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 // 152
{"CUDA_C_64F", {"HIPBLAS_C_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 // 155
{"CUDA_R_8I", {"HIPBLAS_R_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 //
{"CUDA_C_8I", {"HIPBLAS_C_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 //
{"CUDA_R_8U", {"HIPBLAS_R_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 //
{"CUDA_C_8U", {"HIPBLAS_C_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 //
{"CUDA_R_32I", {"HIPBLAS_R_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 //
{"CUDA_C_32I", {"HIPBLAS_C_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 11 //
{"CUDA_R_32U", {"HIPBLAS_R_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 //
{"CUDA_C_32U", {"HIPBLAS_C_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 13 //
// Library property types
// IMPORTANT: no cuda prefix
@@ -2143,11 +2144,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
{"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
// Blas Data Type
{"cublasDataType_t", {"hipblasDataType_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
{"CUBLAS_DATA_FLOAT", {"HIPBLAS_DATA_FLOAT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
{"CUBLAS_DATA_DOUBLE", {"HIPBLAS_DATA_DOUBLE", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
{"CUBLAS_DATA_HALF", {"HIPBLAS_DATA_HALF", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
{"CUBLAS_DATA_INT8", {"HIPBLAS_DATA_INT8", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
{"cublasDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_BLAS}},
// Blas Math mode/tensor operation
{"cublasMath_t", {"hipblasMath_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
@@ -2155,9 +2152,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
{"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
// Blass different GEMM algorithms
{"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
{"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DFALT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1
{"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1
{"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS}},
{"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160
{"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160
{"CUBLAS_GEMM_ALGO0", {"HIPBLAS_GEMM_ALGO0", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 0
{"CUBLAS_GEMM_ALGO1", {"HIPBLAS_GEMM_ALGO1", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 1
{"CUBLAS_GEMM_ALGO2", {"HIPBLAS_GEMM_ALGO2", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 2
@@ -2715,7 +2712,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
//IO in FP16 / FP32, computation in float
{"cublasSgemmEx", {"hipblasSgemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
{"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
{"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS}},
{"cublasGemmBatchedEx", {"hipblasGemmBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
{"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
// IO in Int8 complex/cuComplex, computation in cuComplex
@@ -27,11 +27,12 @@ THE SOFTWARE.
#include "math_fwd.h"
#include <hip/hip_runtime_api.h>
#include <stddef.h>
#include <hip/hip_vector_types.h>
#include <hip/hcc_detail/device_library_decls.h>
#include <hip/hcc_detail/llvm_intrinsics.h>
#include <stddef.h>
/*
Integer Intrinsics
*/
@@ -1036,4 +1037,5 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) {
unsigned char val8 = static_cast<unsigned char>(val);
return __hip_hc_memset(ptr, val8, size);
}
#endif
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include <utility>
#endif
#if defined(__clang__) && (__clang_major__ > 3)
#if defined(__clang__) && (__clang_major__ > 5)
typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
struct __half_raw {
@@ -26,6 +26,8 @@ THE SOFTWARE.
// Half Math Functions
// */
#include "host_defines.h"
extern "C"
{
__device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
@@ -0,0 +1,109 @@
/*
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
// Implementation of malloc and free device functions.
// HIP heap is implemented as a global array with fixed size. Users may define
// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap.
// Size of page in bytes.
#ifndef __HIP_SIZE_OF_PAGE
#define __HIP_SIZE_OF_PAGE 64
#endif
// Total number of pages
#ifndef __HIP_NUM_PAGES
#define __HIP_NUM_PAGES (16 * 64 * 64)
#endif
#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
#if __HCC__ || __HIP__
__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
__attribute__((weak)) __device__
uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
extern "C" inline __device__ void* __hip_malloc(size_t size) {
char* heap = (char*)__hip_device_heap;
if (size > __HIP_SIZE_OF_HEAP) {
return (void*)nullptr;
}
uint32_t totalThreads =
hipBlockDim_x * hipGridDim_x * hipBlockDim_y
* hipGridDim_y * hipBlockDim_z * hipGridDim_z;
uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
* hipBlockDim_y;
uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads;
uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads;
uint32_t stride = size / __HIP_SIZE_OF_PAGE;
uint32_t start = numHeapsPerWorkItem * currentWorkItem;
uint32_t k = 0;
while (__hip_device_page_flag[k] > 0) {
k++;
}
for (uint32_t i = 0; i < stride - 1; i++) {
__hip_device_page_flag[i + start + k] = 1;
}
__hip_device_page_flag[start + stride - 1 + k] = 2;
void* ptr = (void*)(heap
+ heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE);
return ptr;
}
extern "C" inline __device__ void* __hip_free(void* ptr) {
if (ptr == nullptr) {
return nullptr;
}
uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap;
uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE;
while (__hip_device_page_flag[offsetPage] != 0) {
if (__hip_device_page_flag[offsetPage] == 2) {
__hip_device_page_flag[offsetPage] = 0;
offsetPage++;
break;
} else {
__hip_device_page_flag[offsetPage] = 0;
offsetPage++;
}
}
return nullptr;
}
#endif
#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
@@ -128,7 +128,7 @@ extern int HIP_TRACE_API;
// Feature tests:
#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
// Device compile and not host compile:
// 32-bit Atomics:
@@ -245,11 +245,11 @@ static constexpr Coordinates<hc_get_workitem_id> threadIdx;
#endif // defined __HCC__
#if __HCC_OR_HIP_CLANG__
extern "C" __device__ void* __hip_hc_malloc(size_t);
extern "C" __device__ void* __hip_hc_free(void* ptr);
extern "C" __device__ void* __hip_malloc(size_t);
extern "C" __device__ void* __hip_free(void* ptr);
static inline __device__ void* malloc(size_t size) { return __hip_hc_malloc(size); }
static inline __device__ void* free(void* ptr) { return __hip_hc_free(ptr); }
static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
#if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF)
template <typename... All>
@@ -331,13 +331,13 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
typedef int hipLaunchParm;
template <typename... Args, typename F = void (*)(Args...)>
inline void hipLaunchKernelGGL(F kernelName, const dim3& numblocks, const dim3& numthreads,
inline void hipLaunchKernelGGL(F&& kernelName, const dim3& numblocks, const dim3& numthreads,
unsigned memperblock, hipStream_t streamId, Args... args) {
kernelName<<<numblocks, numthreads, memperblock, streamId>>>(args...);
}
template <typename... Args, typename F = void (*)(hipLaunchParm, Args...)>
inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
inline void hipLaunchKernel(F&& kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t groupMemBytes, hipStream_t stream, Args... args) {
hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{},
std::move(args)...);
@@ -423,6 +423,8 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
#define hipGridDim_y gridDim.y
#define hipGridDim_z gridDim.z
#include <hip/hcc_detail/math_functions.h>
#if __HIP_HCC_COMPAT_MODE__
// Define HCC work item functions in terms of HIP builtin variables.
#pragma push_macro("__DEFINE_HCC_FUNC")
@@ -462,7 +464,6 @@ hc_get_workitem_absolute_id(int dim)
#undef __CUDA__
#pragma pop_macro("__CUDA__")
#include <hip/hcc_detail/math_functions.h>
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
@@ -474,4 +475,6 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
#endif // defined(__clang__) && defined(__HIP__)
#include <hip/hcc_detail/hip_memory.h>
#endif // HIP_HCC_DETAIL_RUNTIME_H
@@ -22,6 +22,7 @@ THE SOFTWARE.
#pragma once
#include "hip_fp16_math_fwd.h"
#include "math_fwd.h"
#include <hip/hcc_detail/host_defines.h>
@@ -1159,6 +1160,16 @@ long long llabs(long long x)
#endif
// END INTEGER
__DEVICE__
inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
return __ocml_fma_f16(x, y, z);
}
__DEVICE__
inline float fma(float x, float y, float z) {
return fmaf(x, y, z);
}
#pragma push_macro("__DEF_FLOAT_FUN")
#pragma push_macro("__DEF_FLOAT_FUN2")
#pragma push_macro("__DEF_FLOAT_FUN2I")
@@ -1352,10 +1363,10 @@ __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long lo
return max((unsigned long long) arg1, arg2);
}*/
#else
__DEVICE__ inline static int min(int arg1, int arg2) {
__DEVICE__ inline int min(int arg1, int arg2) {
return (arg1 < arg2) ? arg1 : arg2;
}
__DEVICE__ inline static int max(int arg1, int arg2) {
__DEVICE__ inline int max(int arg1, int arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
@@ -118,6 +118,9 @@ typedef int hipLaunchParm;
}
#endif
#define __clock() clock()
#define __clock64() clock64()
#endif
#endif
+1 -1
Wyświetl plik
@@ -25,7 +25,7 @@ popd () {
function setupENV()
{
sudo apt-get update
sudo apt-get install dpkg-dev rpm doxygen libelf-dev
sudo apt-get install dpkg-dev rpm doxygen libelf-dev rename
}
function buildHIP()
-64
Wyświetl plik
@@ -27,70 +27,6 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include <atomic>
//=================================================================================================
/*
Implementation of malloc and free device functions.
This is the best place to put them because the device
global variables need to be initialized at the start.
*/
__device__ char gpuHeap[SIZE_OF_HEAP];
__device__ uint32_t gpuFlags[NUM_PAGES];
__device__ void* __hip_hc_malloc(size_t size) {
char* heap = (char*)gpuHeap;
if (size > SIZE_OF_HEAP) {
return (void*)nullptr;
}
uint32_t totalThreads =
blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z;
uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x;
uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads;
uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads;
uint32_t stride = size / SIZE_OF_PAGE;
uint32_t start = numHeapsPerWorkItem * currentWorkItem;
uint32_t k = 0;
while (gpuFlags[k] > 0) {
k++;
}
for (uint32_t i = 0; i < stride - 1; i++) {
gpuFlags[i + start + k] = 1;
}
gpuFlags[start + stride - 1 + k] = 2;
void* ptr = (void*)(heap + heapSizePerWorkItem * currentWorkItem + k * SIZE_OF_PAGE);
return ptr;
}
__device__ void* __hip_hc_free(void* ptr) {
if (ptr == nullptr) {
return nullptr;
}
uint32_t offsetByte = (uint64_t)ptr - (uint64_t)gpuHeap;
uint32_t offsetPage = offsetByte / SIZE_OF_PAGE;
while (gpuFlags[offsetPage] != 0) {
if (gpuFlags[offsetPage] == 2) {
gpuFlags[offsetPage] = 0;
offsetPage++;
break;
} else {
gpuFlags[offsetPage] = 0;
offsetPage++;
}
}
return nullptr;
}
// abort
__device__ void abort() { return hc::abort(); }
-11
Wyświetl plik
@@ -29,14 +29,6 @@ THE SOFTWARE.
Heap size computation for malloc and free device functions.
*/
#define NUM_PAGES_PER_THREAD 16
#define SIZE_OF_PAGE 64
#define NUM_THREADS_PER_CU 64
#define NUM_CUS_PER_GPU 64 // Specific for r9 Nano
#define NUM_PAGES NUM_PAGES_PER_THREAD* NUM_THREADS_PER_CU* NUM_CUS_PER_GPU
#define SIZE_MALLOC NUM_PAGES* SIZE_OF_PAGE
#define SIZE_OF_HEAP SIZE_MALLOC
#define HIP_SQRT_2 1.41421356237
#define HIP_SQRT_PI 1.77245385091
@@ -62,9 +54,6 @@ THE SOFTWARE.
#define HIP_PI 3.14159265358979323846
__device__ void* __hip_hc_malloc(size_t size);
__device__ void* __hip_hc_free(void* ptr);
__device__ float __hip_erfinvf(float x);
__device__ double __hip_erfinv(double x);
+1 -1
Wyświetl plik
@@ -99,7 +99,7 @@ hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
return ihipLogStatus(hipErrorInvalidValue);
}
if (limit == hipLimitMallocHeapSize) {
*pValue = (size_t)SIZE_OF_HEAP;
*pValue = (size_t)__HIP_SIZE_OF_HEAP;
return ihipLogStatus(hipSuccess);
} else {
return ihipLogStatus(hipErrorUnsupportedLimit);
+2
Wyświetl plik
@@ -1543,6 +1543,8 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
{
hipError_t e = hipSuccess;
if (sizeBytes == 0) return e;
if (stream && (dst != NULL)) {
if(copyDataType == ihipMemsetDataTypeChar){
if ((sizeBytes & 0x3) == 0) {
@@ -0,0 +1,188 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s NVCC_OPTIONS -std=c++11
* RUN: %t EXCLUDE_HIP_PLATFORM nvcc
* HIT_END
*/
#include "test_common.h"
#include <iostream>
#include <complex>
// Tolerance for error
const double tolerance = 1e-6;
const bool verbose = false;
#define BLKDIM_X 64
#define BLKDIM_Y 1
#define BLKDIM_Z 1
#define NUM_BLK_X 1
#define NUM_BLK_Y 1
#define NUM_BLK_Z 1
#define LEN (BLKDIM_X * BLKDIM_Y * BLKDIM_Z * NUM_BLK_X * NUM_BLK_Y * NUM_BLK_Z)
#define ALL_FUN \
OP(add) \
OP(sub) \
OP(mul) \
OP(div)
#define OP(x) CK_##x,
enum CalcKind {
ALL_FUN
};
#undef OP
#define OP(x) case CK_##x: return #x;
std::string getName(enum CalcKind CK) {
switch(CK){
ALL_FUN
}
}
#undef OP
// Calculates function.
// If the function has one argument, B is ignored.
#define ONE_ARG(func) \
case CK_##func: \
return std::func(A);
template <typename FloatT>
__device__ __host__ FloatT calc(FloatT A, FloatT B, enum CalcKind CK) {
switch (CK) {
case CK_add:
return A + B;
case CK_sub:
return A - B;
case CK_mul:
return A * B;
case CK_div:
return A / B;
}
}
// Allocate memory in kernel and save the address to pA and pB.
// Copy value from A, B to allocated memory.
template <typename FloatT>
__global__ void kernel_alloc(FloatT* A, FloatT* B, FloatT** pA, FloatT** pB) {
int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
* hipBlockDim_y;
if (tx == 0) {
*pA = (FloatT*)malloc(sizeof(FloatT) * LEN);
*pB = (FloatT*)malloc(sizeof(FloatT) * LEN);
for (int i = 0; i < LEN; i++) {
(*pA)[i] = A[i];
(*pB)[i] = B[i];
}
}
}
// Do calculation using values saved in allocated memmory. pA, pB are buffers
// containing the address of the device-side allocated array.
template <typename FloatT>
__global__ void kernel_free(FloatT** pA, FloatT** pB, FloatT* C, enum CalcKind CK) {
int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
* hipBlockDim_y;
C[tx] = calc<FloatT>((*pA)[tx], (*pB)[tx], CK);
if (tx == 0) {
free(*pA);
free(*pB);
}
}
template<typename FloatT>
void test() {
FloatT *A, *Ad, *B, *Bd, *C, *Cd, *D;
A = new FloatT[LEN];
B = new FloatT[LEN];
C = new FloatT[LEN];
D = new FloatT[LEN];
hipMalloc((void**)&Ad, sizeof(FloatT) * LEN);
hipMalloc((void**)&Bd, sizeof(FloatT) * LEN);
hipMalloc((void**)&Cd, sizeof(FloatT) * LEN);
for (uint32_t i = 0; i < LEN; i++) {
A[i] = (i + 1) * 1.0f;
B[i] = A[i];
C[i] = A[i];
}
hipMemcpy(Ad, A, sizeof(FloatT) * LEN, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, sizeof(FloatT) * LEN, hipMemcpyHostToDevice);
// Run kernel for a calculation kind and verify by comparing with host
// calculation result. Returns false if fails.
auto test_fun = [&](enum CalcKind CK) {
// kernel_alloc allocates memory on device side and initialize it.
// kernel_free uses allocated memory from kernel_alloc and does the
// calculation then free the memory.
// pA and pB are buffers to pass the device-side allocated memory address
// from kernel_alloc to kernel_free.
FloatT **pA, **pB;
hipMalloc((FloatT***)&pA, sizeof(FloatT*));
hipMalloc((FloatT***)&pB, sizeof(FloatT*));
dim3 blkDim(BLKDIM_X, BLKDIM_Y, BLKDIM_Z);
dim3 numBlk(NUM_BLK_X, NUM_BLK_Y, NUM_BLK_Z);
hipLaunchKernelGGL(kernel_alloc<FloatT>, numBlk, blkDim, 0, 0,
Ad, Bd, pA, pB);
hipDeviceSynchronize();
hipLaunchKernelGGL(kernel_free<FloatT>, numBlk, blkDim, 0, 0,
pA, pB, Cd, CK);
hipMemcpy(C, Cd, sizeof(FloatT) * LEN, hipMemcpyDeviceToHost);
hipFree(pA);
hipFree(pB);
for (int i = 0; i < LEN; i++) {
FloatT Expected = calc(A[i], B[i], CK);
FloatT error = std::abs(C[i] - Expected);
if (std::abs(Expected) > tolerance) error /= std::abs(Expected);
bool pass = error < tolerance;
if (verbose || !pass) {
std::cout << "Function: " << getName(CK) << " Operands: " << A[i] << " " << B[i]
<< " Result: " << C[i] << " Expected: " << Expected << " Error: " << error
<< " Pass: " << pass << std::endl;
}
if (!pass)
return false;
}
return true;
};
#define OP(x) assert(test_fun(CK_##x));
ALL_FUN
#undef OP
hipFree(Ad);
hipFree(Bd);
hipFree(Cd);
delete[] A;
delete[] B;
delete[] C;
delete[] D;
}
int main() {
test<float>();
test<double>();
passed();
return 0;
}
@@ -40,10 +40,10 @@ __global__ void floatMath(hipLaunchParm lp, float* In, float* Out) {
Out[tid] = __exp10f(Out[tid]);
Out[tid] = __expf(Out[tid]);
Out[tid] = __frsqrt_rn(Out[tid]);
Out[tid] = __fsqrt_rd(Out[tid]);
Out[tid] = __fsqrt_rn(Out[tid]);
Out[tid] = __fsqrt_ru(Out[tid]);
Out[tid] = __fsqrt_rz(Out[tid]);
//Out[tid] = __fsqrt_rd(Out[tid]);
//Out[tid] = __fsqrt_rn(Out[tid]);
//Out[tid] = __fsqrt_ru(Out[tid]);
//Out[tid] = __fsqrt_rz(Out[tid]);
Out[tid] = __log10f(Out[tid]);
Out[tid] = __log2f(Out[tid]);
Out[tid] = __logf(Out[tid]);
@@ -18,7 +18,7 @@ THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 --gpu-architecture=sm_60
* RUN: %t
* HIT_END
*/
@@ -334,4 +334,4 @@ int main(int argc, char** argv) {
hipDeviceReset();
printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!");
exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
}
@@ -33,8 +33,6 @@ THE SOFTWARE.
#define LEN 512
#define SIZE 2048
struct TestClock {
static __global__ void kernel1(int* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64();
@@ -61,9 +59,8 @@ struct TestClock {
assert(0 != A[i]);
}
}
};
int main() {
TestClock().run();
run();
passed();
}
@@ -0,0 +1,140 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
* RUN: %t
* HIT_END
*/
#include "test_common.h"
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <hip/math_functions.h>
#include <iostream>
#define HIP_ASSERT(status) assert(status == hipSuccess)
#define LEN 50
#define SIZE (LEN * sizeof(bool))
__global__ void kernelTestFMA(bool *Ad) {
float f = 1.0f / 3.0f;
double d = f;
int i = 0;
auto Check = [&](bool Cond) { Ad[i++] = Cond; };
// f * f + 3.0f will be different if promoted to double.
float floatResult = fma(f, f, 3.0f);
double doubleResult = fma(d, d, 3.0);
Check(floatResult != doubleResult);
// check promote to float.
Check(fma(f, f, 3) == floatResult);
Check(fma(f, f, (char)3) == floatResult);
Check(fma(f, f, (unsigned char)3) == floatResult);
Check(fma(f, f, (short)3) == floatResult);
Check(fma(f, f, (unsigned short)3) == floatResult);
Check(fma(f, f, (int)3) == floatResult);
Check(fma(f, f, (unsigned int)3) == floatResult);
Check(fma(f, f, (long)3) == floatResult);
Check(fma(f, f, (unsigned long)3) == floatResult);
Check(fma(f, f, true) == fma(f, f, 1.0f));
// check promote to double.
Check(fma(d, (double)f, 3) == doubleResult);
Check(fma(d, (double)f, (char)3) == doubleResult);
Check(fma(d, (double)f, (unsigned char)3) == doubleResult);
Check(fma(d, (double)f, (short)3) == doubleResult);
Check(fma(d, (double)f, (unsigned short)3) == doubleResult);
Check(fma(d, (double)f, (int)3) == doubleResult);
Check(fma(d, (double)f, (unsigned int)3) == doubleResult);
Check(fma(d, (double)f, (long)3) == doubleResult);
Check(fma(d, (double)f, (unsigned long)3) == doubleResult);
Check(fma(d, (double)f, true) == fma((double)f, (double)f, 1.0));
while (i < LEN)
Check(true);
}
void runTestFMA() {
bool *Ad;
bool A[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_ASSERT(hipMalloc((void **)&Ad, SIZE));
hipLaunchKernelGGL(kernelTestFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad);
HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(A[i]);
}
}
__global__ void kernelTestHalfFMA(bool *Ad) {
_Float16 h = (_Float16)(1.0f/3.0f);
float f = h;
double d = f;
int i = 0;
auto Check = [&](bool Cond) { Ad[i++] = Cond; };
// h * h + 3 will be different if promoted to float.
_Float16 halfResult = fma(h, h, (_Float16)3);
float floatResult = fma(f, f, 3.0f);
double doubleResult = fma(d, d, 3.0);
Check(halfResult != floatResult);
Check(halfResult != doubleResult);
// check promote to half.
Check(fma(h, h, 3) == halfResult);
Check(fma(h, h, (char)3) == halfResult);
Check(fma(h, h, (unsigned char)3) == halfResult);
Check(fma(h, h, (short)3) == halfResult);
Check(fma(h, h, (unsigned short)3) == halfResult);
Check(fma(h, h, (int)3) == halfResult);
Check(fma(h, h, (unsigned int)3) == halfResult);
Check(fma(h, h, (long)3) == halfResult);
Check(fma(h, h, (unsigned long)3) == halfResult);
Check(fma(h, h, true) == fma(h, h, (_Float16)1));
while (i < LEN)
Check(true);
}
void runTestHalfFMA() {
bool *Ad;
bool A[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_ASSERT(hipMalloc((void **)&Ad, SIZE));
hipLaunchKernelGGL(kernelTestHalfFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad);
HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(A[i]);
}
}
int main() {
runTestFMA();
runTestHalfFMA();
passed();
}
@@ -30,6 +30,7 @@ THE SOFTWARE.
// Incorrect implementation causes compilation failure due to conflict
// declartions.
#include <new>
#include <hip/math_functions.h>
// Test __HIP_DEVICE_COMPILE__ is defined after math_functions.h
@@ -45,14 +46,6 @@ __device__ __host__ inline void throw_std_bad_alloc()
#endif
}
// Test __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ and __HIP_ARCH_HAS_DYNAMIC_PARALLEL__
// is defined. Eigen HIP/hcc/Half.h __ldg depends on this.
#if !defined(__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__) || \
!defined(__HIP_ARCH_HAS_DYNAMIC_PARALLEL__)
#error \
"__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ or __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ not defined"
#endif
#include <hip/hip_runtime.h>
#include "test_common.h"
@@ -33,7 +33,6 @@ THE SOFTWARE.
#define LEN 512
#define SIZE 2048
struct TestPlacementNew {
class A {
public:
__device__ A() {
@@ -63,9 +62,8 @@ struct TestPlacementNew {
assert(i == A[i]);
}
}
};
int main() {
TestPlacementNew().run();
run();
passed();
}
+40 -13
Wyświetl plik
@@ -34,21 +34,26 @@ THE SOFTWARE.
#include <iostream>
#define HIP_ASSERT(x) (assert((x) == hipSuccess))
#define LEN 512
#define SIZE LEN << 2
#define TEST_DEBUG (0)
__global__ void kernel_trig(hipLaunchParm lp, float* In, float* sin_d, float* cos_d, float* tan_d,
float* sin_pd, float* cos_pd) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
sin_d[tid] = __sinf(In[tid]);
cos_d[tid] = __cosf(In[tid]);
tan_d[tid] = __tanf(In[tid]);
__sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]);
sin_d[tid] = sinf(In[tid]);
cos_d[tid] = cosf(In[tid]);
tan_d[tid] = tanf(In[tid]);
sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]);
}
int main() {
float *In, *sin_h, *cos_h, *tan_h, *sin_ph, *cos_ph;
float *In_d, *sin_d, *cos_d, *tan_d, *sin_pd, *cos_pd;
int errors = 0;
In = new float[LEN];
sin_h = new float[LEN];
cos_h = new float[LEN];
@@ -63,14 +68,16 @@ int main() {
sin_ph[i] = 0.0f;
cos_ph[i] = 0.0f;
}
hipMalloc((void**)&In_d, SIZE);
hipMalloc((void**)&sin_d, SIZE);
hipMalloc((void**)&cos_d, SIZE);
hipMalloc((void**)&tan_d, SIZE);
hipMalloc((void**)&sin_pd, SIZE);
hipMalloc((void**)&cos_pd, SIZE);
HIP_ASSERT(hipMalloc((void**)&In_d, SIZE));
HIP_ASSERT(hipMalloc((void**)&sin_d, SIZE));
HIP_ASSERT(hipMalloc((void**)&cos_d, SIZE));
HIP_ASSERT(hipMalloc((void**)&tan_d, SIZE));
HIP_ASSERT(hipMalloc((void**)&sin_pd, SIZE));
HIP_ASSERT(hipMalloc((void**)&cos_pd, SIZE));
hipMemcpy(In_d, In, SIZE, hipMemcpyHostToDevice);
hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, In_d, sin_d, cos_d, tan_d,
hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0,
In_d, sin_d, cos_d, tan_d,
sin_pd, cos_pd);
hipMemcpy(sin_h, sin_d, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(cos_h, cos_d, SIZE, hipMemcpyDeviceToHost);
@@ -79,8 +86,28 @@ int main() {
hipMemcpy(cos_ph, cos_pd, SIZE, hipMemcpyDeviceToHost);
for (int i = 0; i < LEN; i++) {
if (sin_h[i] != sin_ph[i] || cos_h[i] != cos_ph[i] || tan_h[i] * cos_h[i] != sin_h[i]) {
std::cout << "Failed!" << std::endl;
errors++;
#if TEST_DEBUG
std::cout << "Check Failed!" << std::endl;
std::cout << " sin_h: " << sin_h[i] << " sin_ph: " << sin_ph[i] << "\n"
<< " cos_h: " << cos_h[i] << " cos_ph:" << cos_ph[i] << "\n"
<< " tan_h * cos_h: " << tan_h[i] * cos_h[i] << " sin_h[i]: " << sin_h[i] << "\n";
#endif
}
}
passed();
HIP_ASSERT(hipFree(In_d));
HIP_ASSERT(hipFree(sin_d));
HIP_ASSERT(hipFree(cos_d));
HIP_ASSERT(hipFree(tan_d));
HIP_ASSERT(hipFree(sin_pd));
HIP_ASSERT(hipFree(cos_pd));
if (errors != 0) {
std::cout << "hip_trig FAILED!" << std::endl;
return -1;
} else {
std::cout << "hip_trig PASSED!" << std::endl;
}
return errors;
}
+9 -2
Wyświetl plik
@@ -38,7 +38,10 @@ int getDeviceNumber() {
string str;
std::this_thread::sleep_for(std::chrono::milliseconds(10));
if (!(in = popen("./directed_tests/hipEnvVar -c", "r"))) {
return 1;
// Check at same level
if (!(in = popen("./hipEnvVar -c", "r"))) {
return 1;
}
}
while (fgets(buff, 512, in) != NULL) {
cout << buff;
@@ -54,7 +57,11 @@ void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) {
str += std::to_string(deviceID);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
if (!(in = popen(str.c_str(), "r"))) {
exit(1);
// Check at same level
if (!(in = popen("./hipEnvVar -d ", "r"))) {
exit(1);
}
}
while (fgets(pciBusID, 100, in) != NULL) {
cout << pciBusID;
@@ -0,0 +1,91 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* RUN: %t
* HIT_END
*/
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
#include <iostream>
#include "test_common.h"
#define HIP_ASSERT(status) assert(status == hipSuccess)
#define LEN 512
#define SIZE 2048
__constant__ int ConstantGlobalVar = 123;
static __global__ void kernel(int* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = ConstantGlobalVar;
}
void runTestConstantGlobalVar() {
int *A, *Ad;
A = new int[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
hipLaunchKernelGGL(kernel, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(123 == A[i]);
}
}
__device__ int GlobalArray[LEN];
static __global__ void kernelWrite() {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
GlobalArray[tid] = tid;
}
static __global__ void kernelRead(int* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = GlobalArray[tid];
}
void runTestGlobalArray() {
int *A, *Ad;
A = new int[LEN];
for (unsigned i = 0; i < LEN; i++) {
A[i] = 0;
}
HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
hipLaunchKernelGGL(kernelWrite, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0);
hipLaunchKernelGGL(kernelRead, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
for (unsigned i = 0; i < LEN; i++) {
assert(i == A[i]);
}
}
int main() {
runTestConstantGlobalVar();
runTestGlobalArray();
passed();
}
@@ -170,8 +170,8 @@ void runTests(int64_t numElements) {
// for (int waitStart=0; waitStart<2; waitStart++) {
for (int waitStart = 1; waitStart >= 0; waitStart--) {
unsigned W = waitStart ? 0x1000 : 0;
test(W | 0x01, C_d, C_h, numElements, 0, waitStart, syncNone);
test(W | 0x02, C_d, C_h, numElements, stream, waitStart, syncNone);
test(W | 0x01, C_d, C_h, numElements, 0, 0, syncNone);
test(W | 0x02, C_d, C_h, numElements, stream, 0, syncNone);
test(W | 0x04, C_d, C_h, numElements, 0, waitStart, syncStream);
test(W | 0x08, C_d, C_h, numElements, stream, waitStart, syncStream);
test(W | 0x10, C_d, C_h, numElements, 0, waitStart, syncStopEvent);
@@ -56,7 +56,7 @@ int runTest() {
int testResult = 1;
float *texBuf;
float val[N], output[N];
size_t size = 0;
size_t offset = 0;
float *devBuf;
for (int i = 0; i < N; i++) {
val[i] = (float)i;
@@ -74,7 +74,8 @@ int runTest() {
tex.filterMode = hipFilterModePoint;
tex.normalized = 0;
HIPCHECK(hipBindTexture(&size, tex, (void *)texBuf, chanDesc, N * sizeof(float)));
HIPCHECK(hipBindTexture(&offset, tex, (void *)texBuf, chanDesc, N * sizeof(float)));
HIPCHECK(hipGetTextureAlignmentOffset(&offset,&tex));
dim3 dimBlock(64, 1, 1);
dim3 dimGrid(N / dimBlock.x, 1, 1);
@@ -1,78 +0,0 @@
/*
Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
/* HIT_START
* BUILD: %t %s ../test_common.cpp
* RUN: %t
* HIT_END
*/
#include <hip/hip_runtime.h>
#include "test_common.h"
using namespace std;
#define R 8 //rows, height
#define C 8 //columns, width
texture<int, hipTextureType2D,hipReadModeElementType> tex;
bool runTest(void);
int main(int argc, char** argv) {
bool testResult=runTest();
if (testResult) {
passed();
} else {
exit(EXIT_FAILURE);
}
}
bool runTest()
{
int val[R][C],i,j;
size_t offset;
for(i=0;i<R;i++)
for(j=0;j<C;j++)
{
val[i][j]=(i+1)*(j+1);
}
hipChannelFormatDesc chan_desc=hipCreateChannelDesc(32,0,0,0,hipChannelFormatKindSigned);
hipArray *hipArray;
HIPCHECK(hipMallocArray(&hipArray, &chan_desc,C,R,0));
HIPCHECK(hipMemcpyToArray(hipArray,0,0, val, R*C*sizeof(int), hipMemcpyHostToDevice));
tex.addressMode[0]=hipAddressModeWrap;
tex.addressMode[1]=hipAddressModeWrap;
tex.filterMode=hipFilterModePoint;
tex.normalized=0;
HIPCHECK(hipBindTextureToArray(&tex, hipArray, &chan_desc));
HIPCHECK(hipGetTextureAlignmentOffset(&offset,&tex));
HIPCHECK(hipUnbindTexture(&tex));
HIPCHECK(hipFreeArray(hipArray));
if(offset != 0)
return false;
else
return true;
}