diff --git a/CMakeLists.txt b/CMakeLists.txt
index edb8c2d238..3c62ea4365 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -288,12 +288,22 @@ if(HIP_PLATFORM STREQUAL "hcc")
install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
include(CMakePackageConfigHelpers)
+ if(HIP_COMPILER STREQUAL "hcc")
configure_package_config_file(
- hip-config.cmake.in
+ hip-config-hcc.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake
INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}
PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR
)
+ elseif(HIP_COMPILER STREQUAL "clang")
+ configure_package_config_file(
+ hip-config-clang.cmake.in
+ ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake
+ INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR}
+ PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR
+ )
+ endif()
+
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake
VERSION "${HIP_VERSION}"
diff --git a/Jenkinsfile b/Jenkinsfile
index 2432cea38e..ab6dd6d67f 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -167,7 +167,8 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
}
// Cap the maximum amount of testing, in case of hangs
- // Excluding hipPrintfKernel test from automation; variable fails on CI test machines
+ // Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133
+ // Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1
timeout(time: 1, unit: 'HOURS')
{
stage("${platform} unit testing")
@@ -177,7 +178,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
cd ${build_dir_rel}
make install -j\$(nproc)
make build_tests -i -j\$(nproc)
- ctest -E hipVectorTypes
+ ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)"
"""
// If unit tests output a junit or xunit file in the future, jenkins can parse that file
// to display test results on the dashboard
@@ -385,4 +386,40 @@ rocm_head:
docker_clean_images( job_name, hip_image_name )
*/
}
+},
+cuda_9_x:
+{
+ node('hip-cuda')
+ {
+ ////////////////////////////////////////////////////////////////////////
+ // Block of string constants customizing behavior for cuda
+ String nvcc_ver = 'nvcc-9.x'
+ String from_image = 'ci_test_nodes/cuda-9.x/ubuntu-16.04:latest'
+ String inside_args = '--runtime=nvidia';
+
+ // Checkout source code, dependencies and version files
+ String source_hip_rel = checkout_and_version( nvcc_ver )
+
+ // Create/reuse a docker image that represents the hip build environment
+ def hip_build_image = docker_build_image( nvcc_ver, 'hip', '', source_hip_rel, from_image )
+
+ // Print system information for the log
+ hip_build_image.inside( inside_args )
+ {
+ sh """#!/usr/bin/env bash
+ set -x
+ nvidia-smi
+ nvcc --version
+ """
+ }
+
+ // Conctruct a binary directory path based on build config
+ String build_hip_rel = build_directory_rel( build_config );
+
+ // Build hip inside of the build environment
+ docker_build_inside_image( hip_build_image, inside_args, nvcc_ver, "-DHIP_NVCC_FLAGS=--Wno-deprecated-gpu-targets", build_config, source_hip_rel, build_hip_rel )
+
+ // Clean docker image
+ docker_clean_images( 'hip', docker_build_image_name( ) )
+ }
}
diff --git a/docs/markdown/CUBLAS_API_supported_by_HIP.md b/docs/markdown/CUBLAS_API_supported_by_HIP.md
index 932092612b..750703f4e1 100644
--- a/docs/markdown/CUBLAS_API_supported_by_HIP.md
+++ b/docs/markdown/CUBLAS_API_supported_by_HIP.md
@@ -35,9 +35,9 @@
| enum |***`cublasAtomicsMode_t`*** | |
| 0 |*`CUBLAS_ATOMICS_NOT_ALLOWED`* | |
| 1 |*`CUBLAS_ATOMICS_ALLOWED`* | |
-| enum |***`cublasAtomicsMode_t`*** | |
-| -1 |*`CUBLAS_GEMM_DFALT`* | |
-| -1 |*`CUBLAS_GEMM_DEFAULT`* | |
+| enum |***`cublasGemmAlgo_t`*** |***`hipblasGemmAlgo_t`*** |
+| -1 |*`CUBLAS_GEMM_DFALT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 |
+| -1 |*`CUBLAS_GEMM_DEFAULT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 |
| 0 |*`CUBLAS_GEMM_ALGO0`* | |
| 1 |*`CUBLAS_GEMM_ALGO1`* | |
| 2 |*`CUBLAS_GEMM_ALGO2`* | |
@@ -391,7 +391,7 @@
|`cublasZgemm3m` | |
|`cublasHgemm` |`hipblasHgemm` |
|`cublasSgemmEx` | |
-|`cublasGemmEx` | |
+|`cublasGemmEx` |`hipblasGemmEx` |
|`cublasCgemmEx` | |
|`cublasUint8gemmBias` | |
|`cublasSsyrk` | |
diff --git a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
index dca2683b12..7f87981000 100644
--- a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
+++ b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md
@@ -93,10 +93,10 @@
## **7. Occupancy**
-| **CUDA** | **HIP** |
-|-----------------------------------------------------------|-------------------------------|
+| **CUDA** | **HIP** |
+|-----------------------------------------------------------|-----------------------------------------------|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`|
-| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
+| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
## **8. Execution Control [deprecated since 7.0]**
@@ -326,13 +326,13 @@
## **25. Texture Object Management**
-| **CUDA** | **HIP** |
-|-----------------------------------------------------------|-------------------------------|
-| `cudaCreateTextureObject` |`hipCreateTextureObject` |
-| `cudaDestroyTextureObject` |`hipDestroyTextureObject` |
-| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` |
+| **CUDA** | **HIP** |
+|-----------------------------------------------------------|--------------------------------------|
+| `cudaCreateTextureObject` |`hipCreateTextureObject` |
+| `cudaDestroyTextureObject` |`hipDestroyTextureObject` |
+| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` |
| `cudaGetTextureObjectResourceViewDesc` |`hipGetTextureObjectResourceViewDesc` |
-| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` |
+| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` |
## **26. Surface Object Management**
@@ -352,36 +352,36 @@
## **28. C++ API Routines**
*(7.0 contains, 7.5 doesn’t)*
-| **CUDA** | **HIP** |
-|-----------------------------------------------------------|-------------------------------|
-| `cudaBindSurfaceToArray` | |
-| `cudaBindTexture` | `hipBindTexture` |
-| `cudaBindTexture2D` | |
-| `cudaBindTextureToArray` | |
-| `cudaBindTextureToMipmappedArray` | |
-| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
-| `cudaFuncGetAttributes` | |
-| `cudaFuncSetCacheConfig` | |
-| `cudaGetSymbolAddress` | |
-| `cudaGetSymbolSize` | |
-| `cudaGetTextureAlignmentOffset` | |
-| `cudaLaunch` | |
-| `cudaLaunchKernel` | |
-| `cudaMallocHost` | |
-| `cudaMallocManaged` | |
-| `cudaMemcpyFromSymbol` | |
-| `cudaMemcpyFromSymbolAsync` | |
-| `cudaMemcpyToSymbol` | |
-| `cudaMemcpyToSymbolAsync` | |
+| **CUDA** | **HIP** |
+|-----------------------------------------------------------|------------------------------------------------|
+| `cudaBindSurfaceToArray` | |
+| `cudaBindTexture` | `hipBindTexture` |
+| `cudaBindTexture2D` | |
+| `cudaBindTextureToArray` | |
+| `cudaBindTextureToMipmappedArray` | |
+| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
+| `cudaFuncGetAttributes` | |
+| `cudaFuncSetCacheConfig` | |
+| `cudaGetSymbolAddress` | |
+| `cudaGetSymbolSize` | |
+| `cudaGetTextureAlignmentOffset` | |
+| `cudaLaunch` | |
+| `cudaLaunchKernel` | |
+| `cudaMallocHost` | |
+| `cudaMallocManaged` | |
+| `cudaMemcpyFromSymbol` | |
+| `cudaMemcpyFromSymbolAsync` | |
+| `cudaMemcpyToSymbol` | |
+| `cudaMemcpyToSymbolAsync` | |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` |
-| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
-| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
-| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
-| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
-| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
-| `cudaSetupArgument` | |
-| `cudaStreamAttachMemAsync` | |
-| `cudaUnbindTexture` | `hipUnbindTexture` |
+| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
+| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
+| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
+| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
+| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
+| `cudaSetupArgument` | |
+| `cudaStreamAttachMemAsync` | |
+| `cudaUnbindTexture` | `hipUnbindTexture` |
## **30. Profiler Control**
@@ -395,8 +395,8 @@
## **31. Data types**
-| **type** | **CUDA** | **HIP** |
-|-------------:|-----------------------------------------------|------------------------------------------------------|
+| **type** | **CUDA** | **HIP** |**HIP value** (if differs) |
+|-------------:|-----------------------------------------------|------------------------------------------------------|---------------------------|
| struct | `cudaChannelFormatDesc` | `hipChannelFormatDesc` |
| struct | `cudaDeviceProp` | `hipDeviceProp_t` |
| struct | `cudaExtent` | `hipExtent` |
@@ -790,3 +790,19 @@
| define | `cudaTextureType1DLayered` | `hipTextureType1DLayered` |
| define | `cudaTextureType2DLayered` | `hipTextureType2DLayered` |
| define | `cudaTextureTypeCubemapLayered` | `hipTextureTypeCubemapLayered` |
+| enum |***`cudaDataType_t`*** |***`hipblasDatatype_t`*** |
+| enum |***`cudaDataType`*** |***`hipblasDatatype_t`*** |
+| 2 |*`CUDA_R_16F`* |*`HIPBLAS_R_16F`* | 150 |
+| 6 |*`CUDA_C_16F`* |*`HIPBLAS_C_16F`* | 153 |
+| 0 |*`CUDA_R_32F`* |*`HIPBLAS_R_32F`* | 151 |
+| 4 |*`CUDA_C_32F`* |*`HIPBLAS_C_32F`* | 154 |
+| 1 |*`CUDA_R_64F`* |*`HIPBLAS_R_64F`* | 152 |
+| 5 |*`CUDA_C_64F`* |*`HIPBLAS_C_64F`* | 155 |
+| 3 |*`CUDA_R_8I`* | |
+| 7 |*`CUDA_C_8I`* | |
+| 8 |*`CUDA_R_8U`* | |
+| 9 |*`CUDA_C_8U`* | |
+| 10 |*`CUDA_R_32I`* | |
+| 11 |*`CUDA_C_32I`* | |
+| 12 |*`CUDA_R_32U`* | |
+| 13 |*`CUDA_C_32U`* | |
diff --git a/docs/markdown/CUDNN_API_supported_by_HIP.md b/docs/markdown/CUDNN_API_supported_by_HIP.md
index ec640fecdd..ffdeb881e5 100644
--- a/docs/markdown/CUDNN_API_supported_by_HIP.md
+++ b/docs/markdown/CUDNN_API_supported_by_HIP.md
@@ -239,8 +239,8 @@
|`cudnnSetTensor` |`hipdnnSetTensor` |
|`cudnnScaleTensor` |`hipdnnScaleTensor` |
|`cudnnCreateFilterDescriptor` |`hipdnnCreateFilterDescriptor` |
-|`cudnnSetFilter4dDescriptor` | |
-|`cudnnGetFilter4dDescriptor` | |
+|`cudnnSetFilter4dDescriptor` |`hipdnnSetFilter4dDescriptor` |
+|`cudnnGetFilter4dDescriptor` |`hipdnnGetFilter4dDescriptor` |
|`cudnnSetFilterNdDescriptor` |`hipdnnSetFilterNdDescriptor` |
|`cudnnGetFilterNdDescriptor` |`hipdnnGetFilterNdDescriptor` |
|`cudnnDestroyFilterDescriptor` |`hipdnnDestroyFilterDescriptor` |
diff --git a/docs/markdown/hip_programming_guide.md b/docs/markdown/hip_programming_guide.md
index 9313eb22e1..52d250cab5 100644
--- a/docs/markdown/hip_programming_guide.md
+++ b/docs/markdown/hip_programming_guide.md
@@ -91,8 +91,22 @@ Setting HCC_UNPINNED_COPY_MODE = 3, forces all unpinned transfer to use direct m
Following environment variables can be used to control the transfer thresholds:
-- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64.
+- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64.
- HCC_H2D_PININPLACE_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic. By default it is set to 4096.
- HCC_D2H_PININPLACE_THRESHOLD - Threshold in KB for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic. By default it is set to 1024.
+
+## Device-Side Malloc
+
+hip-hcc and hip-clang supports device-side malloc and free. Users can allocate
+memory dynamically in a kernel. The allocated memory are in global address
+space, however, different threads get different memory allocations for the same
+call of malloc. The allocated memory can be accessed or freed by other threads
+or other kernels. It persists in the life time of the HIP program until it is
+freed.
+
+The memory are allocated in pages. Users can define macro
+`__HIP_SIZE_OF_PAGE` for controlling the page size in bytes and macro
+`__HIP_NUM_PAGES` for controlling the total number of pages that can be
+allocated.
\ No newline at end of file
diff --git a/hip-config.cmake.in b/hip-config-clang.cmake.in
similarity index 97%
rename from hip-config.cmake.in
rename to hip-config-clang.cmake.in
index d5dc6803fc..240f01f60e 100644
--- a/hip-config.cmake.in
+++ b/hip-config-clang.cmake.in
@@ -48,9 +48,6 @@ set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" )
set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc")
set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig")
-if(HIP_COMPILER STREQUAL "hcc")
- find_dependency(hcc)
-endif()
include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
set( hip_LIBRARIES hip::host hip::device)
diff --git a/hip-config-hcc.cmake.in b/hip-config-hcc.cmake.in
new file mode 100644
index 0000000000..efcdf708bb
--- /dev/null
+++ b/hip-config-hcc.cmake.in
@@ -0,0 +1,65 @@
+@PACKAGE_INIT@
+
+include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND)
+if (NOT _CMakeFindDependencyMacro_FOUND)
+ macro(find_dependency dep)
+ if (NOT ${dep}_FOUND)
+ set(cmake_fd_version)
+ if (${ARGC} GREATER 1)
+ set(cmake_fd_version ${ARGV1})
+ endif()
+ set(cmake_fd_exact_arg)
+ if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT)
+ set(cmake_fd_exact_arg EXACT)
+ endif()
+ set(cmake_fd_quiet_arg)
+ if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
+ set(cmake_fd_quiet_arg QUIET)
+ endif()
+ set(cmake_fd_required_arg)
+ if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED)
+ set(cmake_fd_required_arg REQUIRED)
+ endif()
+ find_package(${dep} ${cmake_fd_version}
+ ${cmake_fd_exact_arg}
+ ${cmake_fd_quiet_arg}
+ ${cmake_fd_required_arg}
+ )
+ string(TOUPPER ${dep} cmake_dep_upper)
+ if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND)
+ set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.")
+ set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False)
+ return()
+ endif()
+ set(cmake_fd_version)
+ set(cmake_fd_required_arg)
+ set(cmake_fd_quiet_arg)
+ set(cmake_fd_exact_arg)
+ endif()
+ endmacro()
+endif()
+
+
+set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" )
+set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" )
+set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" )
+set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" )
+
+set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc")
+set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig")
+
+find_dependency(hcc)
+include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
+
+set( hip_LIBRARIES hip::host hip::device)
+set( hip_LIBRARY ${hip_LIBRARIES})
+
+set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR})
+set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS})
+set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR})
+set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR})
+set(HIP_LIBRARIES ${hip_LIBRARIES})
+set(HIP_LIBRARY ${hip_LIBRARY})
+set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE})
+set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE})
+
diff --git a/hipify-clang/CMakeLists.txt b/hipify-clang/CMakeLists.txt
index 5d9070be28..e2703356ee 100644
--- a/hipify-clang/CMakeLists.txt
+++ b/hipify-clang/CMakeLists.txt
@@ -89,7 +89,8 @@ if (HIPIFY_CLANG_TESTS)
if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR
(CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR
(CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR
- (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0"))
+ (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR
+ CUDA_VERSION VERSION_EQUAL "10.0")
message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by clang ${LLVM_PACKAGE_VERSION}.")
if (CUDA_VERSION VERSION_LESS "7.0")
message(STATUS "Please install CUDA 7.0 or higher.")
diff --git a/hipify-clang/README.md b/hipify-clang/README.md
index 7f55dea800..02bc9cfed0 100644
--- a/hipify-clang/README.md
+++ b/hipify-clang/README.md
@@ -30,11 +30,16 @@
## Dependencies
`hipify-clang` requires:
-1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended release is 6.0.1.
-2. CUDA at least version 7.5, latest supported release is 9.0.
+1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended releases:
+
+ 6.0.1 (linux and windows),
-| **LLVM release version** | **CUDA latest supported version** |
-|:------------------------:|:---------------------------------:|
+ 7.0.0 (linux only).
+
+2. CUDA at least version 7.5, latest supported release is 9.2.
+
+| **LLVM release version** | **CUDA latest supported version** | **Comments** |
+|:------------------------:|:---------------------------------:|:------------:|
| 3.8.0 | 7.5 |
| 3.8.1 | 7.5 |
| 3.9.0 | 7.5 |
@@ -46,6 +51,8 @@
| 5.0.2 | 8.0 |
| 6.0.0 | 9.0 |
| 6.0.1 | 9.0 |
+| 7.0.0 | 9.2 | linux only |
+| | 10.0 | not yet supported |
In most cases, you can get a suitable version of LLVM+CLANG with your package manager.
diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp
index 4bd7263bad..01f24a6d43 100644
--- a/hipify-clang/src/CUDA2HipMap.cpp
+++ b/hipify-clang/src/CUDA2HipMap.cpp
@@ -1417,22 +1417,23 @@ const std::map CUDA_IDENTIFIER_MAP{
/////////////////////////////// CUDA RT API ///////////////////////////////
// Data types
- {"cudaDataType_t", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"cudaDataType", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_16F", {"hipR16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_16F", {"hipC16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_32F", {"hipR32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_32F", {"hipC32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_64F", {"hipR64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_64F", {"hipC64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_8I", {"hipR8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_8I", {"hipC8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_8U", {"hipR8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_8U", {"hipC8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_32I", {"hipR32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_32I", {"hipC32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_R_32U", {"hipR32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
- {"CUDA_C_32U", {"hipC32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
+ // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP
+ {"cudaDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}},
+ {"cudaDataType", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}},
+ {"CUDA_R_16F", {"HIPBLAS_R_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 // 150
+ {"CUDA_C_16F", {"HIPBLAS_C_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 // 153
+ {"CUDA_R_32F", {"HIPBLAS_R_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 // 151
+ {"CUDA_C_32F", {"HIPBLAS_C_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 // 154
+ {"CUDA_R_64F", {"HIPBLAS_R_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 // 152
+ {"CUDA_C_64F", {"HIPBLAS_C_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 // 155
+ {"CUDA_R_8I", {"HIPBLAS_R_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 //
+ {"CUDA_C_8I", {"HIPBLAS_C_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 //
+ {"CUDA_R_8U", {"HIPBLAS_R_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 //
+ {"CUDA_C_8U", {"HIPBLAS_C_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 //
+ {"CUDA_R_32I", {"HIPBLAS_R_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 //
+ {"CUDA_C_32I", {"HIPBLAS_C_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 11 //
+ {"CUDA_R_32U", {"HIPBLAS_R_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 //
+ {"CUDA_C_32U", {"HIPBLAS_C_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 13 //
// Library property types
// IMPORTANT: no cuda prefix
@@ -2143,11 +2144,7 @@ const std::map CUDA_IDENTIFIER_MAP{
{"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
// Blas Data Type
- {"cublasDataType_t", {"hipblasDataType_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
- {"CUBLAS_DATA_FLOAT", {"HIPBLAS_DATA_FLOAT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
- {"CUBLAS_DATA_DOUBLE", {"HIPBLAS_DATA_DOUBLE", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
- {"CUBLAS_DATA_HALF", {"HIPBLAS_DATA_HALF", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
- {"CUBLAS_DATA_INT8", {"HIPBLAS_DATA_INT8", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
+ {"cublasDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_BLAS}},
// Blas Math mode/tensor operation
{"cublasMath_t", {"hipblasMath_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
@@ -2155,9 +2152,9 @@ const std::map CUDA_IDENTIFIER_MAP{
{"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}},
// Blass different GEMM algorithms
- {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}},
- {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DFALT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1
- {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1
+ {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS}},
+ {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160
+ {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160
{"CUBLAS_GEMM_ALGO0", {"HIPBLAS_GEMM_ALGO0", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 0
{"CUBLAS_GEMM_ALGO1", {"HIPBLAS_GEMM_ALGO1", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 1
{"CUBLAS_GEMM_ALGO2", {"HIPBLAS_GEMM_ALGO2", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 2
@@ -2715,7 +2712,7 @@ const std::map CUDA_IDENTIFIER_MAP{
//IO in FP16 / FP32, computation in float
{"cublasSgemmEx", {"hipblasSgemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
- {"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
+ {"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS}},
{"cublasGemmBatchedEx", {"hipblasGemmBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
{"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}},
// IO in Int8 complex/cuComplex, computation in cuComplex
diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h
index 508a301344..b319f26e03 100644
--- a/include/hip/hcc_detail/device_functions.h
+++ b/include/hip/hcc_detail/device_functions.h
@@ -27,11 +27,12 @@ THE SOFTWARE.
#include "math_fwd.h"
#include
+#include
+
+
#include
#include
#include
-#include
-
/*
Integer Intrinsics
*/
@@ -1036,4 +1037,5 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) {
unsigned char val8 = static_cast(val);
return __hip_hc_memset(ptr, val8, size);
}
+
#endif
diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h
index 68f0e35f5f..5a6e650069 100644
--- a/include/hip/hcc_detail/hip_fp16.h
+++ b/include/hip/hcc_detail/hip_fp16.h
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include
#endif
-#if defined(__clang__) && (__clang_major__ > 3)
+#if defined(__clang__) && (__clang_major__ > 5)
typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2)));
struct __half_raw {
diff --git a/include/hip/hcc_detail/hip_fp16_math_fwd.h b/include/hip/hcc_detail/hip_fp16_math_fwd.h
index fccbcfbfdc..83fd1349b1 100644
--- a/include/hip/hcc_detail/hip_fp16_math_fwd.h
+++ b/include/hip/hcc_detail/hip_fp16_math_fwd.h
@@ -26,6 +26,8 @@ THE SOFTWARE.
// Half Math Functions
// */
+#include "host_defines.h"
+
extern "C"
{
__device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16);
diff --git a/include/hip/hcc_detail/hip_memory.h b/include/hip/hcc_detail/hip_memory.h
new file mode 100644
index 0000000000..739e488ca3
--- /dev/null
+++ b/include/hip/hcc_detail/hip_memory.h
@@ -0,0 +1,109 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
+#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
+
+// Implementation of malloc and free device functions.
+// HIP heap is implemented as a global array with fixed size. Users may define
+// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap.
+
+// Size of page in bytes.
+#ifndef __HIP_SIZE_OF_PAGE
+#define __HIP_SIZE_OF_PAGE 64
+#endif
+
+// Total number of pages
+#ifndef __HIP_NUM_PAGES
+#define __HIP_NUM_PAGES (16 * 64 * 64)
+#endif
+
+#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
+
+#if __HCC__ || __HIP__
+
+__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
+__attribute__((weak)) __device__
+ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
+
+extern "C" inline __device__ void* __hip_malloc(size_t size) {
+ char* heap = (char*)__hip_device_heap;
+ if (size > __HIP_SIZE_OF_HEAP) {
+ return (void*)nullptr;
+ }
+ uint32_t totalThreads =
+ hipBlockDim_x * hipGridDim_x * hipBlockDim_y
+ * hipGridDim_y * hipBlockDim_z * hipGridDim_z;
+ uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
+ * hipBlockDim_y;
+
+ uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads;
+ uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads;
+
+ uint32_t stride = size / __HIP_SIZE_OF_PAGE;
+ uint32_t start = numHeapsPerWorkItem * currentWorkItem;
+
+ uint32_t k = 0;
+
+ while (__hip_device_page_flag[k] > 0) {
+ k++;
+ }
+
+ for (uint32_t i = 0; i < stride - 1; i++) {
+ __hip_device_page_flag[i + start + k] = 1;
+ }
+
+ __hip_device_page_flag[start + stride - 1 + k] = 2;
+
+ void* ptr = (void*)(heap
+ + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE);
+
+ return ptr;
+}
+
+extern "C" inline __device__ void* __hip_free(void* ptr) {
+ if (ptr == nullptr) {
+ return nullptr;
+ }
+
+ uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap;
+ uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE;
+
+ while (__hip_device_page_flag[offsetPage] != 0) {
+ if (__hip_device_page_flag[offsetPage] == 2) {
+ __hip_device_page_flag[offsetPage] = 0;
+ offsetPage++;
+ break;
+ } else {
+ __hip_device_page_flag[offsetPage] = 0;
+ offsetPage++;
+ }
+ }
+
+ return nullptr;
+}
+
+#endif
+
+#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H
diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h
index 9759a9bdf8..cf4125be31 100644
--- a/include/hip/hcc_detail/hip_runtime.h
+++ b/include/hip/hcc_detail/hip_runtime.h
@@ -128,7 +128,7 @@ extern int HIP_TRACE_API;
// Feature tests:
-#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)
+#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__
// Device compile and not host compile:
// 32-bit Atomics:
@@ -245,11 +245,11 @@ static constexpr Coordinates threadIdx;
#endif // defined __HCC__
#if __HCC_OR_HIP_CLANG__
-extern "C" __device__ void* __hip_hc_malloc(size_t);
-extern "C" __device__ void* __hip_hc_free(void* ptr);
+extern "C" __device__ void* __hip_malloc(size_t);
+extern "C" __device__ void* __hip_free(void* ptr);
-static inline __device__ void* malloc(size_t size) { return __hip_hc_malloc(size); }
-static inline __device__ void* free(void* ptr) { return __hip_hc_free(ptr); }
+static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); }
+static inline __device__ void* free(void* ptr) { return __hip_free(ptr); }
#if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF)
template
@@ -331,13 +331,13 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
typedef int hipLaunchParm;
template
-inline void hipLaunchKernelGGL(F kernelName, const dim3& numblocks, const dim3& numthreads,
+inline void hipLaunchKernelGGL(F&& kernelName, const dim3& numblocks, const dim3& numthreads,
unsigned memperblock, hipStream_t streamId, Args... args) {
kernelName<<>>(args...);
}
template
-inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
+inline void hipLaunchKernel(F&& kernel, const dim3& numBlocks, const dim3& dimBlocks,
std::uint32_t groupMemBytes, hipStream_t stream, Args... args) {
hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{},
std::move(args)...);
@@ -423,6 +423,8 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
#define hipGridDim_y gridDim.y
#define hipGridDim_z gridDim.z
+#include
+
#if __HIP_HCC_COMPAT_MODE__
// Define HCC work item functions in terms of HIP builtin variables.
#pragma push_macro("__DEFINE_HCC_FUNC")
@@ -462,7 +464,6 @@ hc_get_workitem_absolute_id(int dim)
#undef __CUDA__
#pragma pop_macro("__CUDA__")
-#include
hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
@@ -474,4 +475,6 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
#endif // defined(__clang__) && defined(__HIP__)
+#include
+
#endif // HIP_HCC_DETAIL_RUNTIME_H
diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h
index c7a4d0e88f..faf3804719 100644
--- a/include/hip/hcc_detail/math_functions.h
+++ b/include/hip/hcc_detail/math_functions.h
@@ -22,6 +22,7 @@ THE SOFTWARE.
#pragma once
+#include "hip_fp16_math_fwd.h"
#include "math_fwd.h"
#include
@@ -1159,6 +1160,16 @@ long long llabs(long long x)
#endif
// END INTEGER
+__DEVICE__
+inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) {
+ return __ocml_fma_f16(x, y, z);
+}
+
+__DEVICE__
+inline float fma(float x, float y, float z) {
+ return fmaf(x, y, z);
+}
+
#pragma push_macro("__DEF_FLOAT_FUN")
#pragma push_macro("__DEF_FLOAT_FUN2")
#pragma push_macro("__DEF_FLOAT_FUN2I")
@@ -1352,10 +1363,10 @@ __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long lo
return max((unsigned long long) arg1, arg2);
}*/
#else
-__DEVICE__ inline static int min(int arg1, int arg2) {
+__DEVICE__ inline int min(int arg1, int arg2) {
return (arg1 < arg2) ? arg1 : arg2;
}
-__DEVICE__ inline static int max(int arg1, int arg2) {
+__DEVICE__ inline int max(int arg1, int arg2) {
return (arg1 > arg2) ? arg1 : arg2;
}
diff --git a/include/hip/nvcc_detail/hip_runtime.h b/include/hip/nvcc_detail/hip_runtime.h
index fe0b19bf8c..19d740a1ee 100644
--- a/include/hip/nvcc_detail/hip_runtime.h
+++ b/include/hip/nvcc_detail/hip_runtime.h
@@ -118,6 +118,9 @@ typedef int hipLaunchParm;
}
#endif
+#define __clock() clock()
+#define __clock64() clock64()
+
#endif
#endif
diff --git a/install.sh b/install.sh
index f8ad640798..c7dc48e1a8 100755
--- a/install.sh
+++ b/install.sh
@@ -25,7 +25,7 @@ popd () {
function setupENV()
{
sudo apt-get update
- sudo apt-get install dpkg-dev rpm doxygen libelf-dev
+ sudo apt-get install dpkg-dev rpm doxygen libelf-dev rename
}
function buildHIP()
diff --git a/src/device_util.cpp b/src/device_util.cpp
index 2e78dea009..c86e52617b 100644
--- a/src/device_util.cpp
+++ b/src/device_util.cpp
@@ -27,70 +27,6 @@ THE SOFTWARE.
#include "hip/hip_runtime.h"
#include
-//=================================================================================================
-/*
- Implementation of malloc and free device functions.
-
- This is the best place to put them because the device
- global variables need to be initialized at the start.
-*/
-__device__ char gpuHeap[SIZE_OF_HEAP];
-__device__ uint32_t gpuFlags[NUM_PAGES];
-
-__device__ void* __hip_hc_malloc(size_t size) {
- char* heap = (char*)gpuHeap;
- if (size > SIZE_OF_HEAP) {
- return (void*)nullptr;
- }
- uint32_t totalThreads =
- blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z;
- uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x;
-
- uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads;
- uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads;
-
- uint32_t stride = size / SIZE_OF_PAGE;
- uint32_t start = numHeapsPerWorkItem * currentWorkItem;
-
- uint32_t k = 0;
-
- while (gpuFlags[k] > 0) {
- k++;
- }
-
- for (uint32_t i = 0; i < stride - 1; i++) {
- gpuFlags[i + start + k] = 1;
- }
-
- gpuFlags[start + stride - 1 + k] = 2;
-
- void* ptr = (void*)(heap + heapSizePerWorkItem * currentWorkItem + k * SIZE_OF_PAGE);
-
- return ptr;
-}
-
-__device__ void* __hip_hc_free(void* ptr) {
- if (ptr == nullptr) {
- return nullptr;
- }
-
- uint32_t offsetByte = (uint64_t)ptr - (uint64_t)gpuHeap;
- uint32_t offsetPage = offsetByte / SIZE_OF_PAGE;
-
- while (gpuFlags[offsetPage] != 0) {
- if (gpuFlags[offsetPage] == 2) {
- gpuFlags[offsetPage] = 0;
- offsetPage++;
- break;
- } else {
- gpuFlags[offsetPage] = 0;
- offsetPage++;
- }
- }
-
- return nullptr;
-}
-
// abort
__device__ void abort() { return hc::abort(); }
diff --git a/src/device_util.h b/src/device_util.h
index 8fa96da9d9..84dbbf71ed 100644
--- a/src/device_util.h
+++ b/src/device_util.h
@@ -29,14 +29,6 @@ THE SOFTWARE.
Heap size computation for malloc and free device functions.
*/
-#define NUM_PAGES_PER_THREAD 16
-#define SIZE_OF_PAGE 64
-#define NUM_THREADS_PER_CU 64
-#define NUM_CUS_PER_GPU 64 // Specific for r9 Nano
-#define NUM_PAGES NUM_PAGES_PER_THREAD* NUM_THREADS_PER_CU* NUM_CUS_PER_GPU
-#define SIZE_MALLOC NUM_PAGES* SIZE_OF_PAGE
-#define SIZE_OF_HEAP SIZE_MALLOC
-
#define HIP_SQRT_2 1.41421356237
#define HIP_SQRT_PI 1.77245385091
@@ -62,9 +54,6 @@ THE SOFTWARE.
#define HIP_PI 3.14159265358979323846
-__device__ void* __hip_hc_malloc(size_t size);
-__device__ void* __hip_hc_free(void* ptr);
-
__device__ float __hip_erfinvf(float x);
__device__ double __hip_erfinv(double x);
diff --git a/src/hip_device.cpp b/src/hip_device.cpp
index 72150c3f54..2aae7cf2a8 100644
--- a/src/hip_device.cpp
+++ b/src/hip_device.cpp
@@ -99,7 +99,7 @@ hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) {
return ihipLogStatus(hipErrorInvalidValue);
}
if (limit == hipLimitMallocHeapSize) {
- *pValue = (size_t)SIZE_OF_HEAP;
+ *pValue = (size_t)__HIP_SIZE_OF_HEAP;
return ihipLogStatus(hipSuccess);
} else {
return ihipLogStatus(hipErrorUnsupportedLimit);
diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp
index dc5390f014..bda6ad2650 100644
--- a/src/hip_memory.cpp
+++ b/src/hip_memory.cpp
@@ -1543,6 +1543,8 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea
{
hipError_t e = hipSuccess;
+ if (sizeBytes == 0) return e;
+
if (stream && (dst != NULL)) {
if(copyDataType == ihipMemsetDataTypeChar){
if ((sizeBytes & 0x3) == 0) {
diff --git a/tests/src/deviceLib/hipDeviceMalloc.cpp b/tests/src/deviceLib/hipDeviceMalloc.cpp
new file mode 100644
index 0000000000..4ec10077c5
--- /dev/null
+++ b/tests/src/deviceLib/hipDeviceMalloc.cpp
@@ -0,0 +1,188 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
+IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+/* HIT_START
+ * BUILD: %t %s NVCC_OPTIONS -std=c++11
+ * RUN: %t EXCLUDE_HIP_PLATFORM nvcc
+ * HIT_END
+ */
+#include "test_common.h"
+#include
+#include
+
+// Tolerance for error
+const double tolerance = 1e-6;
+const bool verbose = false;
+
+#define BLKDIM_X 64
+#define BLKDIM_Y 1
+#define BLKDIM_Z 1
+#define NUM_BLK_X 1
+#define NUM_BLK_Y 1
+#define NUM_BLK_Z 1
+
+#define LEN (BLKDIM_X * BLKDIM_Y * BLKDIM_Z * NUM_BLK_X * NUM_BLK_Y * NUM_BLK_Z)
+
+#define ALL_FUN \
+ OP(add) \
+ OP(sub) \
+ OP(mul) \
+ OP(div)
+
+#define OP(x) CK_##x,
+enum CalcKind {
+ ALL_FUN
+};
+#undef OP
+
+#define OP(x) case CK_##x: return #x;
+std::string getName(enum CalcKind CK) {
+ switch(CK){
+ ALL_FUN
+ }
+}
+#undef OP
+
+// Calculates function.
+// If the function has one argument, B is ignored.
+#define ONE_ARG(func) \
+ case CK_##func: \
+ return std::func(A);
+
+template
+__device__ __host__ FloatT calc(FloatT A, FloatT B, enum CalcKind CK) {
+ switch (CK) {
+ case CK_add:
+ return A + B;
+ case CK_sub:
+ return A - B;
+ case CK_mul:
+ return A * B;
+ case CK_div:
+ return A / B;
+ }
+}
+
+// Allocate memory in kernel and save the address to pA and pB.
+// Copy value from A, B to allocated memory.
+template
+__global__ void kernel_alloc(FloatT* A, FloatT* B, FloatT** pA, FloatT** pB) {
+ int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
+ * hipBlockDim_y;
+ if (tx == 0) {
+ *pA = (FloatT*)malloc(sizeof(FloatT) * LEN);
+ *pB = (FloatT*)malloc(sizeof(FloatT) * LEN);
+ for (int i = 0; i < LEN; i++) {
+ (*pA)[i] = A[i];
+ (*pB)[i] = B[i];
+ }
+ }
+}
+
+// Do calculation using values saved in allocated memmory. pA, pB are buffers
+// containing the address of the device-side allocated array.
+template
+__global__ void kernel_free(FloatT** pA, FloatT** pB, FloatT* C, enum CalcKind CK) {
+ int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x
+ + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x
+ + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x
+ * hipBlockDim_y;
+ C[tx] = calc((*pA)[tx], (*pB)[tx], CK);
+ if (tx == 0) {
+ free(*pA);
+ free(*pB);
+ }
+}
+
+template
+void test() {
+ FloatT *A, *Ad, *B, *Bd, *C, *Cd, *D;
+ A = new FloatT[LEN];
+ B = new FloatT[LEN];
+ C = new FloatT[LEN];
+ D = new FloatT[LEN];
+ hipMalloc((void**)&Ad, sizeof(FloatT) * LEN);
+ hipMalloc((void**)&Bd, sizeof(FloatT) * LEN);
+ hipMalloc((void**)&Cd, sizeof(FloatT) * LEN);
+
+ for (uint32_t i = 0; i < LEN; i++) {
+ A[i] = (i + 1) * 1.0f;
+ B[i] = A[i];
+ C[i] = A[i];
+ }
+ hipMemcpy(Ad, A, sizeof(FloatT) * LEN, hipMemcpyHostToDevice);
+ hipMemcpy(Bd, B, sizeof(FloatT) * LEN, hipMemcpyHostToDevice);
+
+ // Run kernel for a calculation kind and verify by comparing with host
+ // calculation result. Returns false if fails.
+ auto test_fun = [&](enum CalcKind CK) {
+ // kernel_alloc allocates memory on device side and initialize it.
+ // kernel_free uses allocated memory from kernel_alloc and does the
+ // calculation then free the memory.
+ // pA and pB are buffers to pass the device-side allocated memory address
+ // from kernel_alloc to kernel_free.
+ FloatT **pA, **pB;
+ hipMalloc((FloatT***)&pA, sizeof(FloatT*));
+ hipMalloc((FloatT***)&pB, sizeof(FloatT*));
+ dim3 blkDim(BLKDIM_X, BLKDIM_Y, BLKDIM_Z);
+ dim3 numBlk(NUM_BLK_X, NUM_BLK_Y, NUM_BLK_Z);
+ hipLaunchKernelGGL(kernel_alloc, numBlk, blkDim, 0, 0,
+ Ad, Bd, pA, pB);
+ hipDeviceSynchronize();
+ hipLaunchKernelGGL(kernel_free, numBlk, blkDim, 0, 0,
+ pA, pB, Cd, CK);
+ hipMemcpy(C, Cd, sizeof(FloatT) * LEN, hipMemcpyDeviceToHost);
+ hipFree(pA);
+ hipFree(pB);
+ for (int i = 0; i < LEN; i++) {
+ FloatT Expected = calc(A[i], B[i], CK);
+ FloatT error = std::abs(C[i] - Expected);
+ if (std::abs(Expected) > tolerance) error /= std::abs(Expected);
+ bool pass = error < tolerance;
+ if (verbose || !pass) {
+ std::cout << "Function: " << getName(CK) << " Operands: " << A[i] << " " << B[i]
+ << " Result: " << C[i] << " Expected: " << Expected << " Error: " << error
+ << " Pass: " << pass << std::endl;
+ }
+ if (!pass)
+ return false;
+ }
+ return true;
+ };
+
+#define OP(x) assert(test_fun(CK_##x));
+ ALL_FUN
+#undef OP
+
+ hipFree(Ad);
+ hipFree(Bd);
+ hipFree(Cd);
+ delete[] A;
+ delete[] B;
+ delete[] C;
+ delete[] D;
+}
+
+int main() {
+ test();
+ test();
+ passed();
+ return 0;
+}
diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp
index 403a77f703..fb7e4451dc 100644
--- a/tests/src/deviceLib/hipFloatMath.cpp
+++ b/tests/src/deviceLib/hipFloatMath.cpp
@@ -40,10 +40,10 @@ __global__ void floatMath(hipLaunchParm lp, float* In, float* Out) {
Out[tid] = __exp10f(Out[tid]);
Out[tid] = __expf(Out[tid]);
Out[tid] = __frsqrt_rn(Out[tid]);
- Out[tid] = __fsqrt_rd(Out[tid]);
- Out[tid] = __fsqrt_rn(Out[tid]);
- Out[tid] = __fsqrt_ru(Out[tid]);
- Out[tid] = __fsqrt_rz(Out[tid]);
+ //Out[tid] = __fsqrt_rd(Out[tid]);
+ //Out[tid] = __fsqrt_rn(Out[tid]);
+ //Out[tid] = __fsqrt_ru(Out[tid]);
+ //Out[tid] = __fsqrt_rz(Out[tid]);
Out[tid] = __log10f(Out[tid]);
Out[tid] = __log2f(Out[tid]);
Out[tid] = __logf(Out[tid]);
diff --git a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
index 4890ed0f97..ec38a5d327 100644
--- a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
+++ b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp
@@ -18,7 +18,7 @@ THE SOFTWARE.
*/
/* HIT_START
- * BUILD: %t %s ../test_common.cpp
+ * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 --gpu-architecture=sm_60
* RUN: %t
* HIT_END
*/
@@ -334,4 +334,4 @@ int main(int argc, char** argv) {
hipDeviceReset();
printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!");
exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
-}
\ No newline at end of file
+}
diff --git a/tests/src/deviceLib/hipTestClock.cpp b/tests/src/deviceLib/hipTestClock.cpp
index 46f64e35a3..ee6dca8a42 100644
--- a/tests/src/deviceLib/hipTestClock.cpp
+++ b/tests/src/deviceLib/hipTestClock.cpp
@@ -33,8 +33,6 @@ THE SOFTWARE.
#define LEN 512
#define SIZE 2048
-struct TestClock {
-
static __global__ void kernel1(int* Ad) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
Ad[tid] = clock() + clock64() + __clock() + __clock64();
@@ -61,9 +59,8 @@ struct TestClock {
assert(0 != A[i]);
}
}
-};
int main() {
- TestClock().run();
+ run();
passed();
}
diff --git a/tests/src/deviceLib/hipTestFMA.cpp b/tests/src/deviceLib/hipTestFMA.cpp
new file mode 100644
index 0000000000..6350999084
--- /dev/null
+++ b/tests/src/deviceLib/hipTestFMA.cpp
@@ -0,0 +1,140 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
+ * RUN: %t
+ * HIT_END
+ */
+
+#include "test_common.h"
+#include
+#include
+#include
+#include
+
+#define HIP_ASSERT(status) assert(status == hipSuccess)
+
+#define LEN 50
+#define SIZE (LEN * sizeof(bool))
+
+ __global__ void kernelTestFMA(bool *Ad) {
+ float f = 1.0f / 3.0f;
+ double d = f;
+ int i = 0;
+ auto Check = [&](bool Cond) { Ad[i++] = Cond; };
+ // f * f + 3.0f will be different if promoted to double.
+ float floatResult = fma(f, f, 3.0f);
+ double doubleResult = fma(d, d, 3.0);
+ Check(floatResult != doubleResult);
+
+ // check promote to float.
+ Check(fma(f, f, 3) == floatResult);
+ Check(fma(f, f, (char)3) == floatResult);
+ Check(fma(f, f, (unsigned char)3) == floatResult);
+ Check(fma(f, f, (short)3) == floatResult);
+ Check(fma(f, f, (unsigned short)3) == floatResult);
+ Check(fma(f, f, (int)3) == floatResult);
+ Check(fma(f, f, (unsigned int)3) == floatResult);
+ Check(fma(f, f, (long)3) == floatResult);
+ Check(fma(f, f, (unsigned long)3) == floatResult);
+ Check(fma(f, f, true) == fma(f, f, 1.0f));
+
+ // check promote to double.
+ Check(fma(d, (double)f, 3) == doubleResult);
+ Check(fma(d, (double)f, (char)3) == doubleResult);
+ Check(fma(d, (double)f, (unsigned char)3) == doubleResult);
+ Check(fma(d, (double)f, (short)3) == doubleResult);
+ Check(fma(d, (double)f, (unsigned short)3) == doubleResult);
+ Check(fma(d, (double)f, (int)3) == doubleResult);
+ Check(fma(d, (double)f, (unsigned int)3) == doubleResult);
+ Check(fma(d, (double)f, (long)3) == doubleResult);
+ Check(fma(d, (double)f, (unsigned long)3) == doubleResult);
+ Check(fma(d, (double)f, true) == fma((double)f, (double)f, 1.0));
+
+ while (i < LEN)
+ Check(true);
+ }
+
+ void runTestFMA() {
+ bool *Ad;
+ bool A[LEN];
+ for (unsigned i = 0; i < LEN; i++) {
+ A[i] = 0;
+ }
+
+ HIP_ASSERT(hipMalloc((void **)&Ad, SIZE));
+ hipLaunchKernelGGL(kernelTestFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad);
+ HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
+
+ for (unsigned i = 0; i < LEN; i++) {
+ assert(A[i]);
+ }
+ }
+
+ __global__ void kernelTestHalfFMA(bool *Ad) {
+ _Float16 h = (_Float16)(1.0f/3.0f);
+ float f = h;
+ double d = f;
+ int i = 0;
+ auto Check = [&](bool Cond) { Ad[i++] = Cond; };
+ // h * h + 3 will be different if promoted to float.
+ _Float16 halfResult = fma(h, h, (_Float16)3);
+ float floatResult = fma(f, f, 3.0f);
+ double doubleResult = fma(d, d, 3.0);
+ Check(halfResult != floatResult);
+ Check(halfResult != doubleResult);
+
+ // check promote to half.
+ Check(fma(h, h, 3) == halfResult);
+ Check(fma(h, h, (char)3) == halfResult);
+ Check(fma(h, h, (unsigned char)3) == halfResult);
+ Check(fma(h, h, (short)3) == halfResult);
+ Check(fma(h, h, (unsigned short)3) == halfResult);
+ Check(fma(h, h, (int)3) == halfResult);
+ Check(fma(h, h, (unsigned int)3) == halfResult);
+ Check(fma(h, h, (long)3) == halfResult);
+ Check(fma(h, h, (unsigned long)3) == halfResult);
+ Check(fma(h, h, true) == fma(h, h, (_Float16)1));
+
+ while (i < LEN)
+ Check(true);
+ }
+
+ void runTestHalfFMA() {
+ bool *Ad;
+ bool A[LEN];
+ for (unsigned i = 0; i < LEN; i++) {
+ A[i] = 0;
+ }
+
+ HIP_ASSERT(hipMalloc((void **)&Ad, SIZE));
+ hipLaunchKernelGGL(kernelTestHalfFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad);
+ HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
+
+ for (unsigned i = 0; i < LEN; i++) {
+ assert(A[i]);
+ }
+ }
+
+int main() {
+ runTestFMA();
+ runTestHalfFMA();
+ passed();
+}
diff --git a/tests/src/deviceLib/hipTestIncludeMath.cpp b/tests/src/deviceLib/hipTestIncludeMath.cpp
index 6063eee76c..92b93eda36 100644
--- a/tests/src/deviceLib/hipTestIncludeMath.cpp
+++ b/tests/src/deviceLib/hipTestIncludeMath.cpp
@@ -30,6 +30,7 @@ THE SOFTWARE.
// Incorrect implementation causes compilation failure due to conflict
// declartions.
+#include
#include
// Test __HIP_DEVICE_COMPILE__ is defined after math_functions.h
@@ -45,14 +46,6 @@ __device__ __host__ inline void throw_std_bad_alloc()
#endif
}
-// Test __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ and __HIP_ARCH_HAS_DYNAMIC_PARALLEL__
-// is defined. Eigen HIP/hcc/Half.h __ldg depends on this.
-#if !defined(__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__) || \
- !defined(__HIP_ARCH_HAS_DYNAMIC_PARALLEL__)
-#error \
- "__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ or __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ not defined"
-#endif
-
#include
#include "test_common.h"
diff --git a/tests/src/deviceLib/hipTestNew.cpp b/tests/src/deviceLib/hipTestNew.cpp
index 60774ff21d..d644f8b483 100644
--- a/tests/src/deviceLib/hipTestNew.cpp
+++ b/tests/src/deviceLib/hipTestNew.cpp
@@ -33,7 +33,6 @@ THE SOFTWARE.
#define LEN 512
#define SIZE 2048
-struct TestPlacementNew {
class A {
public:
__device__ A() {
@@ -63,9 +62,8 @@ struct TestPlacementNew {
assert(i == A[i]);
}
}
-};
int main() {
- TestPlacementNew().run();
+ run();
passed();
}
diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp
index e9bc1dcf13..59eaeacadb 100644
--- a/tests/src/deviceLib/hip_trig.cpp
+++ b/tests/src/deviceLib/hip_trig.cpp
@@ -34,21 +34,26 @@ THE SOFTWARE.
#include
+#define HIP_ASSERT(x) (assert((x) == hipSuccess))
+
#define LEN 512
#define SIZE LEN << 2
+#define TEST_DEBUG (0)
+
__global__ void kernel_trig(hipLaunchParm lp, float* In, float* sin_d, float* cos_d, float* tan_d,
float* sin_pd, float* cos_pd) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
- sin_d[tid] = __sinf(In[tid]);
- cos_d[tid] = __cosf(In[tid]);
- tan_d[tid] = __tanf(In[tid]);
- __sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]);
+ sin_d[tid] = sinf(In[tid]);
+ cos_d[tid] = cosf(In[tid]);
+ tan_d[tid] = tanf(In[tid]);
+ sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]);
}
int main() {
float *In, *sin_h, *cos_h, *tan_h, *sin_ph, *cos_ph;
float *In_d, *sin_d, *cos_d, *tan_d, *sin_pd, *cos_pd;
+ int errors = 0;
In = new float[LEN];
sin_h = new float[LEN];
cos_h = new float[LEN];
@@ -63,14 +68,16 @@ int main() {
sin_ph[i] = 0.0f;
cos_ph[i] = 0.0f;
}
- hipMalloc((void**)&In_d, SIZE);
- hipMalloc((void**)&sin_d, SIZE);
- hipMalloc((void**)&cos_d, SIZE);
- hipMalloc((void**)&tan_d, SIZE);
- hipMalloc((void**)&sin_pd, SIZE);
- hipMalloc((void**)&cos_pd, SIZE);
+ HIP_ASSERT(hipMalloc((void**)&In_d, SIZE));
+ HIP_ASSERT(hipMalloc((void**)&sin_d, SIZE));
+ HIP_ASSERT(hipMalloc((void**)&cos_d, SIZE));
+ HIP_ASSERT(hipMalloc((void**)&tan_d, SIZE));
+ HIP_ASSERT(hipMalloc((void**)&sin_pd, SIZE));
+ HIP_ASSERT(hipMalloc((void**)&cos_pd, SIZE));
+
hipMemcpy(In_d, In, SIZE, hipMemcpyHostToDevice);
- hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, In_d, sin_d, cos_d, tan_d,
+ hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0,
+ In_d, sin_d, cos_d, tan_d,
sin_pd, cos_pd);
hipMemcpy(sin_h, sin_d, SIZE, hipMemcpyDeviceToHost);
hipMemcpy(cos_h, cos_d, SIZE, hipMemcpyDeviceToHost);
@@ -79,8 +86,28 @@ int main() {
hipMemcpy(cos_ph, cos_pd, SIZE, hipMemcpyDeviceToHost);
for (int i = 0; i < LEN; i++) {
if (sin_h[i] != sin_ph[i] || cos_h[i] != cos_ph[i] || tan_h[i] * cos_h[i] != sin_h[i]) {
- std::cout << "Failed!" << std::endl;
+ errors++;
+#if TEST_DEBUG
+ std::cout << "Check Failed!" << std::endl;
+ std::cout << " sin_h: " << sin_h[i] << " sin_ph: " << sin_ph[i] << "\n"
+ << " cos_h: " << cos_h[i] << " cos_ph:" << cos_ph[i] << "\n"
+ << " tan_h * cos_h: " << tan_h[i] * cos_h[i] << " sin_h[i]: " << sin_h[i] << "\n";
+#endif
}
}
- passed();
+
+ HIP_ASSERT(hipFree(In_d));
+ HIP_ASSERT(hipFree(sin_d));
+ HIP_ASSERT(hipFree(cos_d));
+ HIP_ASSERT(hipFree(tan_d));
+ HIP_ASSERT(hipFree(sin_pd));
+ HIP_ASSERT(hipFree(cos_pd));
+
+ if (errors != 0) {
+ std::cout << "hip_trig FAILED!" << std::endl;
+ return -1;
+ } else {
+ std::cout << "hip_trig PASSED!" << std::endl;
+ }
+ return errors;
}
diff --git a/tests/src/hipEnvVarDriver.cpp b/tests/src/hipEnvVarDriver.cpp
index 599b138565..e52aa46063 100644
--- a/tests/src/hipEnvVarDriver.cpp
+++ b/tests/src/hipEnvVarDriver.cpp
@@ -38,7 +38,10 @@ int getDeviceNumber() {
string str;
std::this_thread::sleep_for(std::chrono::milliseconds(10));
if (!(in = popen("./directed_tests/hipEnvVar -c", "r"))) {
- return 1;
+ // Check at same level
+ if (!(in = popen("./hipEnvVar -c", "r"))) {
+ return 1;
+ }
}
while (fgets(buff, 512, in) != NULL) {
cout << buff;
@@ -54,7 +57,11 @@ void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) {
str += std::to_string(deviceID);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
if (!(in = popen(str.c_str(), "r"))) {
- exit(1);
+ // Check at same level
+ if (!(in = popen("./hipEnvVar -d ", "r"))) {
+ exit(1);
+ }
+
}
while (fgets(pciBusID, 100, in) != NULL) {
cout << pciBusID;
diff --git a/tests/src/kernel/hipTestGlobalVariable.cpp b/tests/src/kernel/hipTestGlobalVariable.cpp
new file mode 100644
index 0000000000..8ab8bef9c2
--- /dev/null
+++ b/tests/src/kernel/hipTestGlobalVariable.cpp
@@ -0,0 +1,91 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp
+ * RUN: %t
+ * HIT_END
+ */
+
+#include
+#include
+#include
+#include "test_common.h"
+
+#define HIP_ASSERT(status) assert(status == hipSuccess)
+
+#define LEN 512
+#define SIZE 2048
+
+ __constant__ int ConstantGlobalVar = 123;
+
+ static __global__ void kernel(int* Ad) {
+ int tid = threadIdx.x + blockIdx.x * blockDim.x;
+ Ad[tid] = ConstantGlobalVar;
+ }
+
+ void runTestConstantGlobalVar() {
+ int *A, *Ad;
+ A = new int[LEN];
+ for (unsigned i = 0; i < LEN; i++) {
+ A[i] = 0;
+ }
+
+ HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
+ hipLaunchKernelGGL(kernel, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
+ HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
+
+ for (unsigned i = 0; i < LEN; i++) {
+ assert(123 == A[i]);
+ }
+ }
+
+ __device__ int GlobalArray[LEN];
+
+ static __global__ void kernelWrite() {
+ int tid = threadIdx.x + blockIdx.x * blockDim.x;
+ GlobalArray[tid] = tid;
+ }
+ static __global__ void kernelRead(int* Ad) {
+ int tid = threadIdx.x + blockIdx.x * blockDim.x;
+ Ad[tid] = GlobalArray[tid];
+ }
+
+ void runTestGlobalArray() {
+ int *A, *Ad;
+ A = new int[LEN];
+ for (unsigned i = 0; i < LEN; i++) {
+ A[i] = 0;
+ }
+
+ HIP_ASSERT(hipMalloc((void**)&Ad, SIZE));
+ hipLaunchKernelGGL(kernelWrite, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0);
+ hipLaunchKernelGGL(kernelRead, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad);
+ HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost));
+
+ for (unsigned i = 0; i < LEN; i++) {
+ assert(i == A[i]);
+ }
+ }
+
+int main() {
+ runTestConstantGlobalVar();
+ runTestGlobalArray();
+ passed();
+}
diff --git a/tests/src/runtimeApi/event/record_event.cpp b/tests/src/runtimeApi/event/record_event.cpp
index afd6bef2ef..3eb54e1735 100644
--- a/tests/src/runtimeApi/event/record_event.cpp
+++ b/tests/src/runtimeApi/event/record_event.cpp
@@ -170,8 +170,8 @@ void runTests(int64_t numElements) {
// for (int waitStart=0; waitStart<2; waitStart++) {
for (int waitStart = 1; waitStart >= 0; waitStart--) {
unsigned W = waitStart ? 0x1000 : 0;
- test(W | 0x01, C_d, C_h, numElements, 0, waitStart, syncNone);
- test(W | 0x02, C_d, C_h, numElements, stream, waitStart, syncNone);
+ test(W | 0x01, C_d, C_h, numElements, 0, 0, syncNone);
+ test(W | 0x02, C_d, C_h, numElements, stream, 0, syncNone);
test(W | 0x04, C_d, C_h, numElements, 0, waitStart, syncStream);
test(W | 0x08, C_d, C_h, numElements, stream, waitStart, syncStream);
test(W | 0x10, C_d, C_h, numElements, 0, waitStart, syncStopEvent);
diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp
index 5d9a1b103b..fa63efc1bb 100644
--- a/tests/src/texture/hipBindTexRef1DFetch.cpp
+++ b/tests/src/texture/hipBindTexRef1DFetch.cpp
@@ -56,7 +56,7 @@ int runTest() {
int testResult = 1;
float *texBuf;
float val[N], output[N];
- size_t size = 0;
+ size_t offset = 0;
float *devBuf;
for (int i = 0; i < N; i++) {
val[i] = (float)i;
@@ -74,7 +74,8 @@ int runTest() {
tex.filterMode = hipFilterModePoint;
tex.normalized = 0;
- HIPCHECK(hipBindTexture(&size, tex, (void *)texBuf, chanDesc, N * sizeof(float)));
+ HIPCHECK(hipBindTexture(&offset, tex, (void *)texBuf, chanDesc, N * sizeof(float)));
+ HIPCHECK(hipGetTextureAlignmentOffset(&offset,&tex));
dim3 dimBlock(64, 1, 1);
dim3 dimGrid(N / dimBlock.x, 1, 1);
diff --git a/tests/src/texture/hipGetAlgntoffset2D.cpp b/tests/src/texture/hipGetAlgntoffset2D.cpp
deleted file mode 100644
index 3a4cc7dfc8..0000000000
--- a/tests/src/texture/hipGetAlgntoffset2D.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-*/
-
-/* HIT_START
- * BUILD: %t %s ../test_common.cpp
- * RUN: %t
- * HIT_END
- */
-#include
-#include "test_common.h"
-
-using namespace std;
-#define R 8 //rows, height
-#define C 8 //columns, width
-
-texture tex;
-
-bool runTest(void);
-
-int main(int argc, char** argv) {
- bool testResult=runTest();
-
- if (testResult) {
- passed();
- } else {
- exit(EXIT_FAILURE);
- }
-}
-
-bool runTest()
-{
-int val[R][C],i,j;
-size_t offset;
-
-for(i=0;i