diff --git a/CMakeLists.txt b/CMakeLists.txt index edb8c2d238..3c62ea4365 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -288,12 +288,22 @@ if(HIP_PLATFORM STREQUAL "hcc") install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) include(CMakePackageConfigHelpers) + if(HIP_COMPILER STREQUAL "hcc") configure_package_config_file( - hip-config.cmake.in + hip-config-hcc.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR ) + elseif(HIP_COMPILER STREQUAL "clang") + configure_package_config_file( + hip-config-clang.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR + ) + endif() + write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake VERSION "${HIP_VERSION}" diff --git a/Jenkinsfile b/Jenkinsfile index 2432cea38e..ab6dd6d67f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -167,7 +167,8 @@ def docker_build_inside_image( def build_image, String inside_args, String platf } // Cap the maximum amount of testing, in case of hangs - // Excluding hipPrintfKernel test from automation; variable fails on CI test machines + // Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133 + // Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1 timeout(time: 1, unit: 'HOURS') { stage("${platform} unit testing") @@ -177,7 +178,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf cd ${build_dir_rel} make install -j\$(nproc) make build_tests -i -j\$(nproc) - ctest -E hipVectorTypes + ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)" """ // If unit tests output a junit or xunit file in the future, jenkins can parse that file // to display test results on the dashboard @@ -385,4 +386,40 @@ rocm_head: docker_clean_images( job_name, hip_image_name ) */ } +}, +cuda_9_x: +{ + node('hip-cuda') + { + //////////////////////////////////////////////////////////////////////// + // Block of string constants customizing behavior for cuda + String nvcc_ver = 'nvcc-9.x' + String from_image = 'ci_test_nodes/cuda-9.x/ubuntu-16.04:latest' + String inside_args = '--runtime=nvidia'; + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( nvcc_ver ) + + // Create/reuse a docker image that represents the hip build environment + def hip_build_image = docker_build_image( nvcc_ver, 'hip', '', source_hip_rel, from_image ) + + // Print system information for the log + hip_build_image.inside( inside_args ) + { + sh """#!/usr/bin/env bash + set -x + nvidia-smi + nvcc --version + """ + } + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + docker_build_inside_image( hip_build_image, inside_args, nvcc_ver, "-DHIP_NVCC_FLAGS=--Wno-deprecated-gpu-targets", build_config, source_hip_rel, build_hip_rel ) + + // Clean docker image + docker_clean_images( 'hip', docker_build_image_name( ) ) + } } diff --git a/docs/markdown/CUBLAS_API_supported_by_HIP.md b/docs/markdown/CUBLAS_API_supported_by_HIP.md index 932092612b..750703f4e1 100644 --- a/docs/markdown/CUBLAS_API_supported_by_HIP.md +++ b/docs/markdown/CUBLAS_API_supported_by_HIP.md @@ -35,9 +35,9 @@ | enum |***`cublasAtomicsMode_t`*** | | | 0 |*`CUBLAS_ATOMICS_NOT_ALLOWED`* | | | 1 |*`CUBLAS_ATOMICS_ALLOWED`* | | -| enum |***`cublasAtomicsMode_t`*** | | -| -1 |*`CUBLAS_GEMM_DFALT`* | | -| -1 |*`CUBLAS_GEMM_DEFAULT`* | | +| enum |***`cublasGemmAlgo_t`*** |***`hipblasGemmAlgo_t`*** | +| -1 |*`CUBLAS_GEMM_DFALT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 | +| -1 |*`CUBLAS_GEMM_DEFAULT`* |*`HIPBLAS_GEMM_DEFAULT`* | 160 | | 0 |*`CUBLAS_GEMM_ALGO0`* | | | 1 |*`CUBLAS_GEMM_ALGO1`* | | | 2 |*`CUBLAS_GEMM_ALGO2`* | | @@ -391,7 +391,7 @@ |`cublasZgemm3m` | | |`cublasHgemm` |`hipblasHgemm` | |`cublasSgemmEx` | | -|`cublasGemmEx` | | +|`cublasGemmEx` |`hipblasGemmEx` | |`cublasCgemmEx` | | |`cublasUint8gemmBias` | | |`cublasSsyrk` | | diff --git a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index dca2683b12..7f87981000 100644 --- a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -93,10 +93,10 @@ ## **7. Occupancy** -| **CUDA** | **HIP** | -|-----------------------------------------------------------|-------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-----------------------------------------------| | `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`| -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | ## **8. Execution Control [deprecated since 7.0]** @@ -326,13 +326,13 @@ ## **25. Texture Object Management** -| **CUDA** | **HIP** | -|-----------------------------------------------------------|-------------------------------| -| `cudaCreateTextureObject` |`hipCreateTextureObject` | -| `cudaDestroyTextureObject` |`hipDestroyTextureObject` | -| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|--------------------------------------| +| `cudaCreateTextureObject` |`hipCreateTextureObject` | +| `cudaDestroyTextureObject` |`hipDestroyTextureObject` | +| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` | | `cudaGetTextureObjectResourceViewDesc` |`hipGetTextureObjectResourceViewDesc` | -| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` | +| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` | ## **26. Surface Object Management** @@ -352,36 +352,36 @@ ## **28. C++ API Routines** *(7.0 contains, 7.5 doesn’t)* -| **CUDA** | **HIP** | -|-----------------------------------------------------------|-------------------------------| -| `cudaBindSurfaceToArray` | | -| `cudaBindTexture` | `hipBindTexture` | -| `cudaBindTexture2D` | | -| `cudaBindTextureToArray` | | -| `cudaBindTextureToMipmappedArray` | | -| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | -| `cudaFuncGetAttributes` | | -| `cudaFuncSetCacheConfig` | | -| `cudaGetSymbolAddress` | | -| `cudaGetSymbolSize` | | -| `cudaGetTextureAlignmentOffset` | | -| `cudaLaunch` | | -| `cudaLaunchKernel` | | -| `cudaMallocHost` | | -| `cudaMallocManaged` | | -| `cudaMemcpyFromSymbol` | | -| `cudaMemcpyFromSymbolAsync` | | -| `cudaMemcpyToSymbol` | | -| `cudaMemcpyToSymbolAsync` | | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|------------------------------------------------| +| `cudaBindSurfaceToArray` | | +| `cudaBindTexture` | `hipBindTexture` | +| `cudaBindTexture2D` | | +| `cudaBindTextureToArray` | | +| `cudaBindTextureToMipmappedArray` | | +| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | +| `cudaFuncGetAttributes` | | +| `cudaFuncSetCacheConfig` | | +| `cudaGetSymbolAddress` | | +| `cudaGetSymbolSize` | | +| `cudaGetTextureAlignmentOffset` | | +| `cudaLaunch` | | +| `cudaLaunchKernel` | | +| `cudaMallocHost` | | +| `cudaMallocManaged` | | +| `cudaMemcpyFromSymbol` | | +| `cudaMemcpyFromSymbolAsync` | | +| `cudaMemcpyToSymbol` | | +| `cudaMemcpyToSymbolAsync` | | | `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | -| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | -| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | -| `cudaSetupArgument` | | -| `cudaStreamAttachMemAsync` | | -| `cudaUnbindTexture` | `hipUnbindTexture` | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | +| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | +| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | +| `cudaSetupArgument` | | +| `cudaStreamAttachMemAsync` | | +| `cudaUnbindTexture` | `hipUnbindTexture` | ## **30. Profiler Control** @@ -395,8 +395,8 @@ ## **31. Data types** -| **type** | **CUDA** | **HIP** | -|-------------:|-----------------------------------------------|------------------------------------------------------| +| **type** | **CUDA** | **HIP** |**HIP value** (if differs) | +|-------------:|-----------------------------------------------|------------------------------------------------------|---------------------------| | struct | `cudaChannelFormatDesc` | `hipChannelFormatDesc` | | struct | `cudaDeviceProp` | `hipDeviceProp_t` | | struct | `cudaExtent` | `hipExtent` | @@ -790,3 +790,19 @@ | define | `cudaTextureType1DLayered` | `hipTextureType1DLayered` | | define | `cudaTextureType2DLayered` | `hipTextureType2DLayered` | | define | `cudaTextureTypeCubemapLayered` | `hipTextureTypeCubemapLayered` | +| enum |***`cudaDataType_t`*** |***`hipblasDatatype_t`*** | +| enum |***`cudaDataType`*** |***`hipblasDatatype_t`*** | +| 2 |*`CUDA_R_16F`* |*`HIPBLAS_R_16F`* | 150 | +| 6 |*`CUDA_C_16F`* |*`HIPBLAS_C_16F`* | 153 | +| 0 |*`CUDA_R_32F`* |*`HIPBLAS_R_32F`* | 151 | +| 4 |*`CUDA_C_32F`* |*`HIPBLAS_C_32F`* | 154 | +| 1 |*`CUDA_R_64F`* |*`HIPBLAS_R_64F`* | 152 | +| 5 |*`CUDA_C_64F`* |*`HIPBLAS_C_64F`* | 155 | +| 3 |*`CUDA_R_8I`* | | +| 7 |*`CUDA_C_8I`* | | +| 8 |*`CUDA_R_8U`* | | +| 9 |*`CUDA_C_8U`* | | +| 10 |*`CUDA_R_32I`* | | +| 11 |*`CUDA_C_32I`* | | +| 12 |*`CUDA_R_32U`* | | +| 13 |*`CUDA_C_32U`* | | diff --git a/docs/markdown/CUDNN_API_supported_by_HIP.md b/docs/markdown/CUDNN_API_supported_by_HIP.md index ec640fecdd..ffdeb881e5 100644 --- a/docs/markdown/CUDNN_API_supported_by_HIP.md +++ b/docs/markdown/CUDNN_API_supported_by_HIP.md @@ -239,8 +239,8 @@ |`cudnnSetTensor` |`hipdnnSetTensor` | |`cudnnScaleTensor` |`hipdnnScaleTensor` | |`cudnnCreateFilterDescriptor` |`hipdnnCreateFilterDescriptor` | -|`cudnnSetFilter4dDescriptor` | | -|`cudnnGetFilter4dDescriptor` | | +|`cudnnSetFilter4dDescriptor` |`hipdnnSetFilter4dDescriptor` | +|`cudnnGetFilter4dDescriptor` |`hipdnnGetFilter4dDescriptor` | |`cudnnSetFilterNdDescriptor` |`hipdnnSetFilterNdDescriptor` | |`cudnnGetFilterNdDescriptor` |`hipdnnGetFilterNdDescriptor` | |`cudnnDestroyFilterDescriptor` |`hipdnnDestroyFilterDescriptor` | diff --git a/docs/markdown/hip_programming_guide.md b/docs/markdown/hip_programming_guide.md index 9313eb22e1..52d250cab5 100644 --- a/docs/markdown/hip_programming_guide.md +++ b/docs/markdown/hip_programming_guide.md @@ -91,8 +91,22 @@ Setting HCC_UNPINNED_COPY_MODE = 3, forces all unpinned transfer to use direct m Following environment variables can be used to control the transfer thresholds: -- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64. +- HCC_H2D_STAGING_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. By default it is set to 64. - HCC_H2D_PININPLACE_THRESHOLD - Threshold in KB for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic. By default it is set to 4096. - HCC_D2H_PININPLACE_THRESHOLD - Threshold in KB for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic. By default it is set to 1024. + +## Device-Side Malloc + +hip-hcc and hip-clang supports device-side malloc and free. Users can allocate +memory dynamically in a kernel. The allocated memory are in global address +space, however, different threads get different memory allocations for the same +call of malloc. The allocated memory can be accessed or freed by other threads +or other kernels. It persists in the life time of the HIP program until it is +freed. + +The memory are allocated in pages. Users can define macro +`__HIP_SIZE_OF_PAGE` for controlling the page size in bytes and macro +`__HIP_NUM_PAGES` for controlling the total number of pages that can be +allocated. \ No newline at end of file diff --git a/hip-config.cmake.in b/hip-config-clang.cmake.in similarity index 97% rename from hip-config.cmake.in rename to hip-config-clang.cmake.in index d5dc6803fc..240f01f60e 100644 --- a/hip-config.cmake.in +++ b/hip-config-clang.cmake.in @@ -48,9 +48,6 @@ set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") -if(HIP_COMPILER STREQUAL "hcc") - find_dependency(hcc) -endif() include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) set( hip_LIBRARIES hip::host hip::device) diff --git a/hip-config-hcc.cmake.in b/hip-config-hcc.cmake.in new file mode 100644 index 0000000000..efcdf708bb --- /dev/null +++ b/hip-config-hcc.cmake.in @@ -0,0 +1,65 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) +if (NOT _CMakeFindDependencyMacro_FOUND) + macro(find_dependency dep) + if (NOT ${dep}_FOUND) + set(cmake_fd_version) + if (${ARGC} GREATER 1) + set(cmake_fd_version ${ARGV1}) + endif() + set(cmake_fd_exact_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) + set(cmake_fd_exact_arg EXACT) + endif() + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + find_package(${dep} ${cmake_fd_version} + ${cmake_fd_exact_arg} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg} + ) + string(TOUPPER ${dep} cmake_dep_upper) + if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + set(cmake_fd_version) + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) + set(cmake_fd_exact_arg) + endif() + endmacro() +endif() + + +set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) +set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) +set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) +set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) + +set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") +set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") + +find_dependency(hcc) +include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) + +set( hip_LIBRARIES hip::host hip::device) +set( hip_LIBRARY ${hip_LIBRARIES}) + +set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) +set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) +set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) +set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) +set(HIP_LIBRARIES ${hip_LIBRARIES}) +set(HIP_LIBRARY ${hip_LIBRARY}) +set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) +set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) + diff --git a/hipify-clang/CMakeLists.txt b/hipify-clang/CMakeLists.txt index 5d9070be28..e2703356ee 100644 --- a/hipify-clang/CMakeLists.txt +++ b/hipify-clang/CMakeLists.txt @@ -89,7 +89,8 @@ if (HIPIFY_CLANG_TESTS) if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR - (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0")) + (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR + CUDA_VERSION VERSION_EQUAL "10.0") message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by clang ${LLVM_PACKAGE_VERSION}.") if (CUDA_VERSION VERSION_LESS "7.0") message(STATUS "Please install CUDA 7.0 or higher.") diff --git a/hipify-clang/README.md b/hipify-clang/README.md index 7f55dea800..02bc9cfed0 100644 --- a/hipify-clang/README.md +++ b/hipify-clang/README.md @@ -30,11 +30,16 @@ ## Dependencies `hipify-clang` requires: -1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended release is 6.0.1. -2. CUDA at least version 7.5, latest supported release is 9.0. +1. LLVM+CLANG of at least version 3.8.0, latest stable and recommended releases: + + 6.0.1 (linux and windows), -| **LLVM release version** | **CUDA latest supported version** | -|:------------------------:|:---------------------------------:| + 7.0.0 (linux only). + +2. CUDA at least version 7.5, latest supported release is 9.2. + +| **LLVM release version** | **CUDA latest supported version** | **Comments** | +|:------------------------:|:---------------------------------:|:------------:| | 3.8.0 | 7.5 | | 3.8.1 | 7.5 | | 3.9.0 | 7.5 | @@ -46,6 +51,8 @@ | 5.0.2 | 8.0 | | 6.0.0 | 9.0 | | 6.0.1 | 9.0 | +| 7.0.0 | 9.2 | linux only | +| | 10.0 | not yet supported | In most cases, you can get a suitable version of LLVM+CLANG with your package manager. diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp index 4bd7263bad..01f24a6d43 100644 --- a/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipify-clang/src/CUDA2HipMap.cpp @@ -1417,22 +1417,23 @@ const std::map CUDA_IDENTIFIER_MAP{ /////////////////////////////// CUDA RT API /////////////////////////////// // Data types - {"cudaDataType_t", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaDataType", {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_16F", {"hipR16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_16F", {"hipC16F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_32F", {"hipR32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_32F", {"hipC32F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_64F", {"hipR64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_64F", {"hipC64F", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_8I", {"hipR8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_8I", {"hipC8I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_8U", {"hipR8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_8U", {"hipC8U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_32I", {"hipR32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_32I", {"hipC32I", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_R_32U", {"hipR32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"CUDA_C_32U", {"hipC32U", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, + // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP + {"cudaDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}}, + {"cudaDataType", {"hipblasDatatype_t", CONV_TYPE, API_RUNTIME}}, + {"CUDA_R_16F", {"HIPBLAS_R_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 // 150 + {"CUDA_C_16F", {"HIPBLAS_C_16F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 // 153 + {"CUDA_R_32F", {"HIPBLAS_R_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 // 151 + {"CUDA_C_32F", {"HIPBLAS_C_32F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 // 154 + {"CUDA_R_64F", {"HIPBLAS_R_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 // 152 + {"CUDA_C_64F", {"HIPBLAS_C_64F", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 // 155 + {"CUDA_R_8I", {"HIPBLAS_R_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 // + {"CUDA_C_8I", {"HIPBLAS_C_8I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 // + {"CUDA_R_8U", {"HIPBLAS_R_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 // + {"CUDA_C_8U", {"HIPBLAS_C_8U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 // + {"CUDA_R_32I", {"HIPBLAS_R_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 // + {"CUDA_C_32I", {"HIPBLAS_C_32I", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 11 // + {"CUDA_R_32U", {"HIPBLAS_R_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 // + {"CUDA_C_32U", {"HIPBLAS_C_32U", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 13 // // Library property types // IMPORTANT: no cuda prefix @@ -2143,11 +2144,7 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // Blas Data Type - {"cublasDataType_t", {"hipblasDataType_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_DATA_FLOAT", {"HIPBLAS_DATA_FLOAT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_DATA_DOUBLE", {"HIPBLAS_DATA_DOUBLE", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_DATA_HALF", {"HIPBLAS_DATA_HALF", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_DATA_INT8", {"HIPBLAS_DATA_INT8", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, + {"cublasDataType_t", {"hipblasDatatype_t", CONV_TYPE, API_BLAS}}, // Blas Math mode/tensor operation {"cublasMath_t", {"hipblasMath_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, @@ -2155,9 +2152,9 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // Blass different GEMM algorithms - {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DFALT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1 - {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // -1 + {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", CONV_TYPE, API_BLAS}}, + {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 + {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 {"CUBLAS_GEMM_ALGO0", {"HIPBLAS_GEMM_ALGO0", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 0 {"CUBLAS_GEMM_ALGO1", {"HIPBLAS_GEMM_ALGO1", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 1 {"CUBLAS_GEMM_ALGO2", {"HIPBLAS_GEMM_ALGO2", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, // 2 @@ -2715,7 +2712,7 @@ const std::map CUDA_IDENTIFIER_MAP{ //IO in FP16 / FP32, computation in float {"cublasSgemmEx", {"hipblasSgemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}}, + {"cublasGemmEx", {"hipblasGemmEx", CONV_MATH_FUNC, API_BLAS}}, {"cublasGemmBatchedEx", {"hipblasGemmBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}}, {"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx", CONV_MATH_FUNC, API_BLAS, HIP_UNSUPPORTED}}, // IO in Int8 complex/cuComplex, computation in cuComplex diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index 508a301344..b319f26e03 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -27,11 +27,12 @@ THE SOFTWARE. #include "math_fwd.h" #include +#include + + #include #include #include -#include - /* Integer Intrinsics */ @@ -1036,4 +1037,5 @@ static inline __device__ void* memset(void* ptr, int val, size_t size) { unsigned char val8 = static_cast(val); return __hip_hc_memset(ptr, val8, size); } + #endif diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 68f0e35f5f..5a6e650069 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -29,7 +29,7 @@ THE SOFTWARE. #include #endif -#if defined(__clang__) && (__clang_major__ > 3) +#if defined(__clang__) && (__clang_major__ > 5) typedef _Float16 _Float16_2 __attribute__((ext_vector_type(2))); struct __half_raw { diff --git a/include/hip/hcc_detail/hip_fp16_math_fwd.h b/include/hip/hcc_detail/hip_fp16_math_fwd.h index fccbcfbfdc..83fd1349b1 100644 --- a/include/hip/hcc_detail/hip_fp16_math_fwd.h +++ b/include/hip/hcc_detail/hip_fp16_math_fwd.h @@ -26,6 +26,8 @@ THE SOFTWARE. // Half Math Functions // */ +#include "host_defines.h" + extern "C" { __device__ __attribute__((const)) _Float16 __ocml_ceil_f16(_Float16); diff --git a/include/hip/hcc_detail/hip_memory.h b/include/hip/hcc_detail/hip_memory.h new file mode 100644 index 0000000000..739e488ca3 --- /dev/null +++ b/include/hip/hcc_detail/hip_memory.h @@ -0,0 +1,109 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H + +// Implementation of malloc and free device functions. +// HIP heap is implemented as a global array with fixed size. Users may define +// __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. + +// Size of page in bytes. +#ifndef __HIP_SIZE_OF_PAGE +#define __HIP_SIZE_OF_PAGE 64 +#endif + +// Total number of pages +#ifndef __HIP_NUM_PAGES +#define __HIP_NUM_PAGES (16 * 64 * 64) +#endif + +#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) + +#if __HCC__ || __HIP__ + +__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; +__attribute__((weak)) __device__ + uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; + +extern "C" inline __device__ void* __hip_malloc(size_t size) { + char* heap = (char*)__hip_device_heap; + if (size > __HIP_SIZE_OF_HEAP) { + return (void*)nullptr; + } + uint32_t totalThreads = + hipBlockDim_x * hipGridDim_x * hipBlockDim_y + * hipGridDim_y * hipBlockDim_z * hipGridDim_z; + uint32_t currentWorkItem = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x + + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x + + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x + * hipBlockDim_y; + + uint32_t numHeapsPerWorkItem = __HIP_NUM_PAGES / totalThreads; + uint32_t heapSizePerWorkItem = __HIP_SIZE_OF_HEAP / totalThreads; + + uint32_t stride = size / __HIP_SIZE_OF_PAGE; + uint32_t start = numHeapsPerWorkItem * currentWorkItem; + + uint32_t k = 0; + + while (__hip_device_page_flag[k] > 0) { + k++; + } + + for (uint32_t i = 0; i < stride - 1; i++) { + __hip_device_page_flag[i + start + k] = 1; + } + + __hip_device_page_flag[start + stride - 1 + k] = 2; + + void* ptr = (void*)(heap + + heapSizePerWorkItem * currentWorkItem + k * __HIP_SIZE_OF_PAGE); + + return ptr; +} + +extern "C" inline __device__ void* __hip_free(void* ptr) { + if (ptr == nullptr) { + return nullptr; + } + + uint32_t offsetByte = (uint64_t)ptr - (uint64_t)__hip_device_heap; + uint32_t offsetPage = offsetByte / __HIP_SIZE_OF_PAGE; + + while (__hip_device_page_flag[offsetPage] != 0) { + if (__hip_device_page_flag[offsetPage] == 2) { + __hip_device_page_flag[offsetPage] = 0; + offsetPage++; + break; + } else { + __hip_device_page_flag[offsetPage] = 0; + offsetPage++; + } + } + + return nullptr; +} + +#endif + +#endif // HIP_INCLUDE_HIP_HCC_DETAIL_HIP_MEMORY_H diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index 9759a9bdf8..cf4125be31 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -128,7 +128,7 @@ extern int HIP_TRACE_API; // Feature tests: -#if defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0) +#if (defined(__HCC_ACCELERATOR__) && (__HCC_ACCELERATOR__ != 0)) || __HIP_DEVICE_COMPILE__ // Device compile and not host compile: // 32-bit Atomics: @@ -245,11 +245,11 @@ static constexpr Coordinates threadIdx; #endif // defined __HCC__ #if __HCC_OR_HIP_CLANG__ -extern "C" __device__ void* __hip_hc_malloc(size_t); -extern "C" __device__ void* __hip_hc_free(void* ptr); +extern "C" __device__ void* __hip_malloc(size_t); +extern "C" __device__ void* __hip_free(void* ptr); -static inline __device__ void* malloc(size_t size) { return __hip_hc_malloc(size); } -static inline __device__ void* free(void* ptr) { return __hip_hc_free(ptr); } +static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } +static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } #if defined(__HCC_ACCELERATOR__) && defined(HC_FEATURE_PRINTF) template @@ -331,13 +331,13 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri typedef int hipLaunchParm; template -inline void hipLaunchKernelGGL(F kernelName, const dim3& numblocks, const dim3& numthreads, +inline void hipLaunchKernelGGL(F&& kernelName, const dim3& numblocks, const dim3& numthreads, unsigned memperblock, hipStream_t streamId, Args... args) { kernelName<<>>(args...); } template -inline void hipLaunchKernel(F kernel, const dim3& numBlocks, const dim3& dimBlocks, +inline void hipLaunchKernel(F&& kernel, const dim3& numBlocks, const dim3& dimBlocks, std::uint32_t groupMemBytes, hipStream_t stream, Args... args) { hipLaunchKernelGGL(kernel, numBlocks, dimBlocks, groupMemBytes, stream, hipLaunchParm{}, std::move(args)...); @@ -423,6 +423,8 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim; #define hipGridDim_y gridDim.y #define hipGridDim_z gridDim.z +#include + #if __HIP_HCC_COMPAT_MODE__ // Define HCC work item functions in terms of HIP builtin variables. #pragma push_macro("__DEFINE_HCC_FUNC") @@ -462,7 +464,6 @@ hc_get_workitem_absolute_id(int dim) #undef __CUDA__ #pragma pop_macro("__CUDA__") -#include hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, @@ -474,4 +475,6 @@ hipError_t hipHccModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, #endif // defined(__clang__) && defined(__HIP__) +#include + #endif // HIP_HCC_DETAIL_RUNTIME_H diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index c7a4d0e88f..faf3804719 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -22,6 +22,7 @@ THE SOFTWARE. #pragma once +#include "hip_fp16_math_fwd.h" #include "math_fwd.h" #include @@ -1159,6 +1160,16 @@ long long llabs(long long x) #endif // END INTEGER +__DEVICE__ +inline _Float16 fma(_Float16 x, _Float16 y, _Float16 z) { + return __ocml_fma_f16(x, y, z); +} + +__DEVICE__ +inline float fma(float x, float y, float z) { + return fmaf(x, y, z); +} + #pragma push_macro("__DEF_FLOAT_FUN") #pragma push_macro("__DEF_FLOAT_FUN2") #pragma push_macro("__DEF_FLOAT_FUN2I") @@ -1352,10 +1363,10 @@ __DEVICE__ inline static unsigned long long max(long long arg1, unsigned long lo return max((unsigned long long) arg1, arg2); }*/ #else -__DEVICE__ inline static int min(int arg1, int arg2) { +__DEVICE__ inline int min(int arg1, int arg2) { return (arg1 < arg2) ? arg1 : arg2; } -__DEVICE__ inline static int max(int arg1, int arg2) { +__DEVICE__ inline int max(int arg1, int arg2) { return (arg1 > arg2) ? arg1 : arg2; } diff --git a/include/hip/nvcc_detail/hip_runtime.h b/include/hip/nvcc_detail/hip_runtime.h index fe0b19bf8c..19d740a1ee 100644 --- a/include/hip/nvcc_detail/hip_runtime.h +++ b/include/hip/nvcc_detail/hip_runtime.h @@ -118,6 +118,9 @@ typedef int hipLaunchParm; } #endif +#define __clock() clock() +#define __clock64() clock64() + #endif #endif diff --git a/install.sh b/install.sh index f8ad640798..c7dc48e1a8 100755 --- a/install.sh +++ b/install.sh @@ -25,7 +25,7 @@ popd () { function setupENV() { sudo apt-get update - sudo apt-get install dpkg-dev rpm doxygen libelf-dev + sudo apt-get install dpkg-dev rpm doxygen libelf-dev rename } function buildHIP() diff --git a/src/device_util.cpp b/src/device_util.cpp index 2e78dea009..c86e52617b 100644 --- a/src/device_util.cpp +++ b/src/device_util.cpp @@ -27,70 +27,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include -//================================================================================================= -/* - Implementation of malloc and free device functions. - - This is the best place to put them because the device - global variables need to be initialized at the start. -*/ -__device__ char gpuHeap[SIZE_OF_HEAP]; -__device__ uint32_t gpuFlags[NUM_PAGES]; - -__device__ void* __hip_hc_malloc(size_t size) { - char* heap = (char*)gpuHeap; - if (size > SIZE_OF_HEAP) { - return (void*)nullptr; - } - uint32_t totalThreads = - blockDim.x * gridDim.x * blockDim.y * gridDim.y * blockDim.z * gridDim.z; - uint32_t currentWorkItem = threadIdx.x + blockDim.x * blockIdx.x; - - uint32_t numHeapsPerWorkItem = NUM_PAGES / totalThreads; - uint32_t heapSizePerWorkItem = SIZE_OF_HEAP / totalThreads; - - uint32_t stride = size / SIZE_OF_PAGE; - uint32_t start = numHeapsPerWorkItem * currentWorkItem; - - uint32_t k = 0; - - while (gpuFlags[k] > 0) { - k++; - } - - for (uint32_t i = 0; i < stride - 1; i++) { - gpuFlags[i + start + k] = 1; - } - - gpuFlags[start + stride - 1 + k] = 2; - - void* ptr = (void*)(heap + heapSizePerWorkItem * currentWorkItem + k * SIZE_OF_PAGE); - - return ptr; -} - -__device__ void* __hip_hc_free(void* ptr) { - if (ptr == nullptr) { - return nullptr; - } - - uint32_t offsetByte = (uint64_t)ptr - (uint64_t)gpuHeap; - uint32_t offsetPage = offsetByte / SIZE_OF_PAGE; - - while (gpuFlags[offsetPage] != 0) { - if (gpuFlags[offsetPage] == 2) { - gpuFlags[offsetPage] = 0; - offsetPage++; - break; - } else { - gpuFlags[offsetPage] = 0; - offsetPage++; - } - } - - return nullptr; -} - // abort __device__ void abort() { return hc::abort(); } diff --git a/src/device_util.h b/src/device_util.h index 8fa96da9d9..84dbbf71ed 100644 --- a/src/device_util.h +++ b/src/device_util.h @@ -29,14 +29,6 @@ THE SOFTWARE. Heap size computation for malloc and free device functions. */ -#define NUM_PAGES_PER_THREAD 16 -#define SIZE_OF_PAGE 64 -#define NUM_THREADS_PER_CU 64 -#define NUM_CUS_PER_GPU 64 // Specific for r9 Nano -#define NUM_PAGES NUM_PAGES_PER_THREAD* NUM_THREADS_PER_CU* NUM_CUS_PER_GPU -#define SIZE_MALLOC NUM_PAGES* SIZE_OF_PAGE -#define SIZE_OF_HEAP SIZE_MALLOC - #define HIP_SQRT_2 1.41421356237 #define HIP_SQRT_PI 1.77245385091 @@ -62,9 +54,6 @@ THE SOFTWARE. #define HIP_PI 3.14159265358979323846 -__device__ void* __hip_hc_malloc(size_t size); -__device__ void* __hip_hc_free(void* ptr); - __device__ float __hip_erfinvf(float x); __device__ double __hip_erfinv(double x); diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 72150c3f54..2aae7cf2a8 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -99,7 +99,7 @@ hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { return ihipLogStatus(hipErrorInvalidValue); } if (limit == hipLimitMallocHeapSize) { - *pValue = (size_t)SIZE_OF_HEAP; + *pValue = (size_t)__HIP_SIZE_OF_HEAP; return ihipLogStatus(hipSuccess); } else { return ihipLogStatus(hipErrorUnsupportedLimit); diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index dc5390f014..bda6ad2650 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -1543,6 +1543,8 @@ hipError_t ihipMemset(void* dst, int value, size_t sizeBytes, hipStream_t strea { hipError_t e = hipSuccess; + if (sizeBytes == 0) return e; + if (stream && (dst != NULL)) { if(copyDataType == ihipMemsetDataTypeChar){ if ((sizeBytes & 0x3) == 0) { diff --git a/tests/src/deviceLib/hipDeviceMalloc.cpp b/tests/src/deviceLib/hipDeviceMalloc.cpp new file mode 100644 index 0000000000..4ec10077c5 --- /dev/null +++ b/tests/src/deviceLib/hipDeviceMalloc.cpp @@ -0,0 +1,188 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +/* HIT_START + * BUILD: %t %s NVCC_OPTIONS -std=c++11 + * RUN: %t EXCLUDE_HIP_PLATFORM nvcc + * HIT_END + */ +#include "test_common.h" +#include +#include + +// Tolerance for error +const double tolerance = 1e-6; +const bool verbose = false; + +#define BLKDIM_X 64 +#define BLKDIM_Y 1 +#define BLKDIM_Z 1 +#define NUM_BLK_X 1 +#define NUM_BLK_Y 1 +#define NUM_BLK_Z 1 + +#define LEN (BLKDIM_X * BLKDIM_Y * BLKDIM_Z * NUM_BLK_X * NUM_BLK_Y * NUM_BLK_Z) + +#define ALL_FUN \ + OP(add) \ + OP(sub) \ + OP(mul) \ + OP(div) + +#define OP(x) CK_##x, +enum CalcKind { + ALL_FUN +}; +#undef OP + +#define OP(x) case CK_##x: return #x; +std::string getName(enum CalcKind CK) { + switch(CK){ + ALL_FUN + } +} +#undef OP + +// Calculates function. +// If the function has one argument, B is ignored. +#define ONE_ARG(func) \ + case CK_##func: \ + return std::func(A); + +template +__device__ __host__ FloatT calc(FloatT A, FloatT B, enum CalcKind CK) { + switch (CK) { + case CK_add: + return A + B; + case CK_sub: + return A - B; + case CK_mul: + return A * B; + case CK_div: + return A / B; + } +} + +// Allocate memory in kernel and save the address to pA and pB. +// Copy value from A, B to allocated memory. +template +__global__ void kernel_alloc(FloatT* A, FloatT* B, FloatT** pA, FloatT** pB) { + int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x + + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x + + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x + * hipBlockDim_y; + if (tx == 0) { + *pA = (FloatT*)malloc(sizeof(FloatT) * LEN); + *pB = (FloatT*)malloc(sizeof(FloatT) * LEN); + for (int i = 0; i < LEN; i++) { + (*pA)[i] = A[i]; + (*pB)[i] = B[i]; + } + } +} + +// Do calculation using values saved in allocated memmory. pA, pB are buffers +// containing the address of the device-side allocated array. +template +__global__ void kernel_free(FloatT** pA, FloatT** pB, FloatT* C, enum CalcKind CK) { + int tx = hipThreadIdx_x + hipBlockDim_x * hipBlockIdx_x + + (hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_y) * hipBlockDim_x + + (hipThreadIdx_z + hipBlockDim_z * hipBlockIdx_z) * hipBlockDim_x + * hipBlockDim_y; + C[tx] = calc((*pA)[tx], (*pB)[tx], CK); + if (tx == 0) { + free(*pA); + free(*pB); + } +} + +template +void test() { + FloatT *A, *Ad, *B, *Bd, *C, *Cd, *D; + A = new FloatT[LEN]; + B = new FloatT[LEN]; + C = new FloatT[LEN]; + D = new FloatT[LEN]; + hipMalloc((void**)&Ad, sizeof(FloatT) * LEN); + hipMalloc((void**)&Bd, sizeof(FloatT) * LEN); + hipMalloc((void**)&Cd, sizeof(FloatT) * LEN); + + for (uint32_t i = 0; i < LEN; i++) { + A[i] = (i + 1) * 1.0f; + B[i] = A[i]; + C[i] = A[i]; + } + hipMemcpy(Ad, A, sizeof(FloatT) * LEN, hipMemcpyHostToDevice); + hipMemcpy(Bd, B, sizeof(FloatT) * LEN, hipMemcpyHostToDevice); + + // Run kernel for a calculation kind and verify by comparing with host + // calculation result. Returns false if fails. + auto test_fun = [&](enum CalcKind CK) { + // kernel_alloc allocates memory on device side and initialize it. + // kernel_free uses allocated memory from kernel_alloc and does the + // calculation then free the memory. + // pA and pB are buffers to pass the device-side allocated memory address + // from kernel_alloc to kernel_free. + FloatT **pA, **pB; + hipMalloc((FloatT***)&pA, sizeof(FloatT*)); + hipMalloc((FloatT***)&pB, sizeof(FloatT*)); + dim3 blkDim(BLKDIM_X, BLKDIM_Y, BLKDIM_Z); + dim3 numBlk(NUM_BLK_X, NUM_BLK_Y, NUM_BLK_Z); + hipLaunchKernelGGL(kernel_alloc, numBlk, blkDim, 0, 0, + Ad, Bd, pA, pB); + hipDeviceSynchronize(); + hipLaunchKernelGGL(kernel_free, numBlk, blkDim, 0, 0, + pA, pB, Cd, CK); + hipMemcpy(C, Cd, sizeof(FloatT) * LEN, hipMemcpyDeviceToHost); + hipFree(pA); + hipFree(pB); + for (int i = 0; i < LEN; i++) { + FloatT Expected = calc(A[i], B[i], CK); + FloatT error = std::abs(C[i] - Expected); + if (std::abs(Expected) > tolerance) error /= std::abs(Expected); + bool pass = error < tolerance; + if (verbose || !pass) { + std::cout << "Function: " << getName(CK) << " Operands: " << A[i] << " " << B[i] + << " Result: " << C[i] << " Expected: " << Expected << " Error: " << error + << " Pass: " << pass << std::endl; + } + if (!pass) + return false; + } + return true; + }; + +#define OP(x) assert(test_fun(CK_##x)); + ALL_FUN +#undef OP + + hipFree(Ad); + hipFree(Bd); + hipFree(Cd); + delete[] A; + delete[] B; + delete[] C; + delete[] D; +} + +int main() { + test(); + test(); + passed(); + return 0; +} diff --git a/tests/src/deviceLib/hipFloatMath.cpp b/tests/src/deviceLib/hipFloatMath.cpp index 403a77f703..fb7e4451dc 100644 --- a/tests/src/deviceLib/hipFloatMath.cpp +++ b/tests/src/deviceLib/hipFloatMath.cpp @@ -40,10 +40,10 @@ __global__ void floatMath(hipLaunchParm lp, float* In, float* Out) { Out[tid] = __exp10f(Out[tid]); Out[tid] = __expf(Out[tid]); Out[tid] = __frsqrt_rn(Out[tid]); - Out[tid] = __fsqrt_rd(Out[tid]); - Out[tid] = __fsqrt_rn(Out[tid]); - Out[tid] = __fsqrt_ru(Out[tid]); - Out[tid] = __fsqrt_rz(Out[tid]); + //Out[tid] = __fsqrt_rd(Out[tid]); + //Out[tid] = __fsqrt_rn(Out[tid]); + //Out[tid] = __fsqrt_ru(Out[tid]); + //Out[tid] = __fsqrt_rz(Out[tid]); Out[tid] = __log10f(Out[tid]); Out[tid] = __log2f(Out[tid]); Out[tid] = __logf(Out[tid]); diff --git a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp index 4890ed0f97..ec38a5d327 100644 --- a/tests/src/deviceLib/hipSimpleAtomicsTest.cpp +++ b/tests/src/deviceLib/hipSimpleAtomicsTest.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 --gpu-architecture=sm_60 * RUN: %t * HIT_END */ @@ -334,4 +334,4 @@ int main(int argc, char** argv) { hipDeviceReset(); printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!"); exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); -} \ No newline at end of file +} diff --git a/tests/src/deviceLib/hipTestClock.cpp b/tests/src/deviceLib/hipTestClock.cpp index 46f64e35a3..ee6dca8a42 100644 --- a/tests/src/deviceLib/hipTestClock.cpp +++ b/tests/src/deviceLib/hipTestClock.cpp @@ -33,8 +33,6 @@ THE SOFTWARE. #define LEN 512 #define SIZE 2048 -struct TestClock { - static __global__ void kernel1(int* Ad) { int tid = threadIdx.x + blockIdx.x * blockDim.x; Ad[tid] = clock() + clock64() + __clock() + __clock64(); @@ -61,9 +59,8 @@ struct TestClock { assert(0 != A[i]); } } -}; int main() { - TestClock().run(); + run(); passed(); } diff --git a/tests/src/deviceLib/hipTestFMA.cpp b/tests/src/deviceLib/hipTestFMA.cpp new file mode 100644 index 0000000000..6350999084 --- /dev/null +++ b/tests/src/deviceLib/hipTestFMA.cpp @@ -0,0 +1,140 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t + * HIT_END + */ + +#include "test_common.h" +#include +#include +#include +#include + +#define HIP_ASSERT(status) assert(status == hipSuccess) + +#define LEN 50 +#define SIZE (LEN * sizeof(bool)) + + __global__ void kernelTestFMA(bool *Ad) { + float f = 1.0f / 3.0f; + double d = f; + int i = 0; + auto Check = [&](bool Cond) { Ad[i++] = Cond; }; + // f * f + 3.0f will be different if promoted to double. + float floatResult = fma(f, f, 3.0f); + double doubleResult = fma(d, d, 3.0); + Check(floatResult != doubleResult); + + // check promote to float. + Check(fma(f, f, 3) == floatResult); + Check(fma(f, f, (char)3) == floatResult); + Check(fma(f, f, (unsigned char)3) == floatResult); + Check(fma(f, f, (short)3) == floatResult); + Check(fma(f, f, (unsigned short)3) == floatResult); + Check(fma(f, f, (int)3) == floatResult); + Check(fma(f, f, (unsigned int)3) == floatResult); + Check(fma(f, f, (long)3) == floatResult); + Check(fma(f, f, (unsigned long)3) == floatResult); + Check(fma(f, f, true) == fma(f, f, 1.0f)); + + // check promote to double. + Check(fma(d, (double)f, 3) == doubleResult); + Check(fma(d, (double)f, (char)3) == doubleResult); + Check(fma(d, (double)f, (unsigned char)3) == doubleResult); + Check(fma(d, (double)f, (short)3) == doubleResult); + Check(fma(d, (double)f, (unsigned short)3) == doubleResult); + Check(fma(d, (double)f, (int)3) == doubleResult); + Check(fma(d, (double)f, (unsigned int)3) == doubleResult); + Check(fma(d, (double)f, (long)3) == doubleResult); + Check(fma(d, (double)f, (unsigned long)3) == doubleResult); + Check(fma(d, (double)f, true) == fma((double)f, (double)f, 1.0)); + + while (i < LEN) + Check(true); + } + + void runTestFMA() { + bool *Ad; + bool A[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + + HIP_ASSERT(hipMalloc((void **)&Ad, SIZE)); + hipLaunchKernelGGL(kernelTestFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad); + HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + + for (unsigned i = 0; i < LEN; i++) { + assert(A[i]); + } + } + + __global__ void kernelTestHalfFMA(bool *Ad) { + _Float16 h = (_Float16)(1.0f/3.0f); + float f = h; + double d = f; + int i = 0; + auto Check = [&](bool Cond) { Ad[i++] = Cond; }; + // h * h + 3 will be different if promoted to float. + _Float16 halfResult = fma(h, h, (_Float16)3); + float floatResult = fma(f, f, 3.0f); + double doubleResult = fma(d, d, 3.0); + Check(halfResult != floatResult); + Check(halfResult != doubleResult); + + // check promote to half. + Check(fma(h, h, 3) == halfResult); + Check(fma(h, h, (char)3) == halfResult); + Check(fma(h, h, (unsigned char)3) == halfResult); + Check(fma(h, h, (short)3) == halfResult); + Check(fma(h, h, (unsigned short)3) == halfResult); + Check(fma(h, h, (int)3) == halfResult); + Check(fma(h, h, (unsigned int)3) == halfResult); + Check(fma(h, h, (long)3) == halfResult); + Check(fma(h, h, (unsigned long)3) == halfResult); + Check(fma(h, h, true) == fma(h, h, (_Float16)1)); + + while (i < LEN) + Check(true); + } + + void runTestHalfFMA() { + bool *Ad; + bool A[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + + HIP_ASSERT(hipMalloc((void **)&Ad, SIZE)); + hipLaunchKernelGGL(kernelTestHalfFMA, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, Ad); + HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + + for (unsigned i = 0; i < LEN; i++) { + assert(A[i]); + } + } + +int main() { + runTestFMA(); + runTestHalfFMA(); + passed(); +} diff --git a/tests/src/deviceLib/hipTestIncludeMath.cpp b/tests/src/deviceLib/hipTestIncludeMath.cpp index 6063eee76c..92b93eda36 100644 --- a/tests/src/deviceLib/hipTestIncludeMath.cpp +++ b/tests/src/deviceLib/hipTestIncludeMath.cpp @@ -30,6 +30,7 @@ THE SOFTWARE. // Incorrect implementation causes compilation failure due to conflict // declartions. +#include #include // Test __HIP_DEVICE_COMPILE__ is defined after math_functions.h @@ -45,14 +46,6 @@ __device__ __host__ inline void throw_std_bad_alloc() #endif } -// Test __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ and __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ -// is defined. Eigen HIP/hcc/Half.h __ldg depends on this. -#if !defined(__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__) || \ - !defined(__HIP_ARCH_HAS_DYNAMIC_PARALLEL__) -#error \ - "__HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ or __HIP_ARCH_HAS_DYNAMIC_PARALLEL__ not defined" -#endif - #include #include "test_common.h" diff --git a/tests/src/deviceLib/hipTestNew.cpp b/tests/src/deviceLib/hipTestNew.cpp index 60774ff21d..d644f8b483 100644 --- a/tests/src/deviceLib/hipTestNew.cpp +++ b/tests/src/deviceLib/hipTestNew.cpp @@ -33,7 +33,6 @@ THE SOFTWARE. #define LEN 512 #define SIZE 2048 -struct TestPlacementNew { class A { public: __device__ A() { @@ -63,9 +62,8 @@ struct TestPlacementNew { assert(i == A[i]); } } -}; int main() { - TestPlacementNew().run(); + run(); passed(); } diff --git a/tests/src/deviceLib/hip_trig.cpp b/tests/src/deviceLib/hip_trig.cpp index e9bc1dcf13..59eaeacadb 100644 --- a/tests/src/deviceLib/hip_trig.cpp +++ b/tests/src/deviceLib/hip_trig.cpp @@ -34,21 +34,26 @@ THE SOFTWARE. #include +#define HIP_ASSERT(x) (assert((x) == hipSuccess)) + #define LEN 512 #define SIZE LEN << 2 +#define TEST_DEBUG (0) + __global__ void kernel_trig(hipLaunchParm lp, float* In, float* sin_d, float* cos_d, float* tan_d, float* sin_pd, float* cos_pd) { int tid = threadIdx.x + blockIdx.x * blockDim.x; - sin_d[tid] = __sinf(In[tid]); - cos_d[tid] = __cosf(In[tid]); - tan_d[tid] = __tanf(In[tid]); - __sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]); + sin_d[tid] = sinf(In[tid]); + cos_d[tid] = cosf(In[tid]); + tan_d[tid] = tanf(In[tid]); + sincosf(In[tid], &sin_pd[tid], &cos_pd[tid]); } int main() { float *In, *sin_h, *cos_h, *tan_h, *sin_ph, *cos_ph; float *In_d, *sin_d, *cos_d, *tan_d, *sin_pd, *cos_pd; + int errors = 0; In = new float[LEN]; sin_h = new float[LEN]; cos_h = new float[LEN]; @@ -63,14 +68,16 @@ int main() { sin_ph[i] = 0.0f; cos_ph[i] = 0.0f; } - hipMalloc((void**)&In_d, SIZE); - hipMalloc((void**)&sin_d, SIZE); - hipMalloc((void**)&cos_d, SIZE); - hipMalloc((void**)&tan_d, SIZE); - hipMalloc((void**)&sin_pd, SIZE); - hipMalloc((void**)&cos_pd, SIZE); + HIP_ASSERT(hipMalloc((void**)&In_d, SIZE)); + HIP_ASSERT(hipMalloc((void**)&sin_d, SIZE)); + HIP_ASSERT(hipMalloc((void**)&cos_d, SIZE)); + HIP_ASSERT(hipMalloc((void**)&tan_d, SIZE)); + HIP_ASSERT(hipMalloc((void**)&sin_pd, SIZE)); + HIP_ASSERT(hipMalloc((void**)&cos_pd, SIZE)); + hipMemcpy(In_d, In, SIZE, hipMemcpyHostToDevice); - hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, In_d, sin_d, cos_d, tan_d, + hipLaunchKernel(kernel_trig, dim3(LEN, 1, 1), dim3(1, 1, 1), 0, 0, + In_d, sin_d, cos_d, tan_d, sin_pd, cos_pd); hipMemcpy(sin_h, sin_d, SIZE, hipMemcpyDeviceToHost); hipMemcpy(cos_h, cos_d, SIZE, hipMemcpyDeviceToHost); @@ -79,8 +86,28 @@ int main() { hipMemcpy(cos_ph, cos_pd, SIZE, hipMemcpyDeviceToHost); for (int i = 0; i < LEN; i++) { if (sin_h[i] != sin_ph[i] || cos_h[i] != cos_ph[i] || tan_h[i] * cos_h[i] != sin_h[i]) { - std::cout << "Failed!" << std::endl; + errors++; +#if TEST_DEBUG + std::cout << "Check Failed!" << std::endl; + std::cout << " sin_h: " << sin_h[i] << " sin_ph: " << sin_ph[i] << "\n" + << " cos_h: " << cos_h[i] << " cos_ph:" << cos_ph[i] << "\n" + << " tan_h * cos_h: " << tan_h[i] * cos_h[i] << " sin_h[i]: " << sin_h[i] << "\n"; +#endif } } - passed(); + + HIP_ASSERT(hipFree(In_d)); + HIP_ASSERT(hipFree(sin_d)); + HIP_ASSERT(hipFree(cos_d)); + HIP_ASSERT(hipFree(tan_d)); + HIP_ASSERT(hipFree(sin_pd)); + HIP_ASSERT(hipFree(cos_pd)); + + if (errors != 0) { + std::cout << "hip_trig FAILED!" << std::endl; + return -1; + } else { + std::cout << "hip_trig PASSED!" << std::endl; + } + return errors; } diff --git a/tests/src/hipEnvVarDriver.cpp b/tests/src/hipEnvVarDriver.cpp index 599b138565..e52aa46063 100644 --- a/tests/src/hipEnvVarDriver.cpp +++ b/tests/src/hipEnvVarDriver.cpp @@ -38,7 +38,10 @@ int getDeviceNumber() { string str; std::this_thread::sleep_for(std::chrono::milliseconds(10)); if (!(in = popen("./directed_tests/hipEnvVar -c", "r"))) { - return 1; + // Check at same level + if (!(in = popen("./hipEnvVar -c", "r"))) { + return 1; + } } while (fgets(buff, 512, in) != NULL) { cout << buff; @@ -54,7 +57,11 @@ void getDevicePCIBusNumRemote(int deviceID, char* pciBusID) { str += std::to_string(deviceID); std::this_thread::sleep_for(std::chrono::milliseconds(10)); if (!(in = popen(str.c_str(), "r"))) { - exit(1); + // Check at same level + if (!(in = popen("./hipEnvVar -d ", "r"))) { + exit(1); + } + } while (fgets(pciBusID, 100, in) != NULL) { cout << pciBusID; diff --git a/tests/src/kernel/hipTestGlobalVariable.cpp b/tests/src/kernel/hipTestGlobalVariable.cpp new file mode 100644 index 0000000000..8ab8bef9c2 --- /dev/null +++ b/tests/src/kernel/hipTestGlobalVariable.cpp @@ -0,0 +1,91 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include +#include +#include "test_common.h" + +#define HIP_ASSERT(status) assert(status == hipSuccess) + +#define LEN 512 +#define SIZE 2048 + + __constant__ int ConstantGlobalVar = 123; + + static __global__ void kernel(int* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = ConstantGlobalVar; + } + + void runTestConstantGlobalVar() { + int *A, *Ad; + A = new int[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + + HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); + hipLaunchKernelGGL(kernel, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + + for (unsigned i = 0; i < LEN; i++) { + assert(123 == A[i]); + } + } + + __device__ int GlobalArray[LEN]; + + static __global__ void kernelWrite() { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + GlobalArray[tid] = tid; + } + static __global__ void kernelRead(int* Ad) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tid] = GlobalArray[tid]; + } + + void runTestGlobalArray() { + int *A, *Ad; + A = new int[LEN]; + for (unsigned i = 0; i < LEN; i++) { + A[i] = 0; + } + + HIP_ASSERT(hipMalloc((void**)&Ad, SIZE)); + hipLaunchKernelGGL(kernelWrite, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0); + hipLaunchKernelGGL(kernelRead, dim3(1, 1, 1), dim3(LEN, 1, 1), 0, 0, Ad); + HIP_ASSERT(hipMemcpy(A, Ad, SIZE, hipMemcpyDeviceToHost)); + + for (unsigned i = 0; i < LEN; i++) { + assert(i == A[i]); + } + } + +int main() { + runTestConstantGlobalVar(); + runTestGlobalArray(); + passed(); +} diff --git a/tests/src/runtimeApi/event/record_event.cpp b/tests/src/runtimeApi/event/record_event.cpp index afd6bef2ef..3eb54e1735 100644 --- a/tests/src/runtimeApi/event/record_event.cpp +++ b/tests/src/runtimeApi/event/record_event.cpp @@ -170,8 +170,8 @@ void runTests(int64_t numElements) { // for (int waitStart=0; waitStart<2; waitStart++) { for (int waitStart = 1; waitStart >= 0; waitStart--) { unsigned W = waitStart ? 0x1000 : 0; - test(W | 0x01, C_d, C_h, numElements, 0, waitStart, syncNone); - test(W | 0x02, C_d, C_h, numElements, stream, waitStart, syncNone); + test(W | 0x01, C_d, C_h, numElements, 0, 0, syncNone); + test(W | 0x02, C_d, C_h, numElements, stream, 0, syncNone); test(W | 0x04, C_d, C_h, numElements, 0, waitStart, syncStream); test(W | 0x08, C_d, C_h, numElements, stream, waitStart, syncStream); test(W | 0x10, C_d, C_h, numElements, 0, waitStart, syncStopEvent); diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index 5d9a1b103b..fa63efc1bb 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -56,7 +56,7 @@ int runTest() { int testResult = 1; float *texBuf; float val[N], output[N]; - size_t size = 0; + size_t offset = 0; float *devBuf; for (int i = 0; i < N; i++) { val[i] = (float)i; @@ -74,7 +74,8 @@ int runTest() { tex.filterMode = hipFilterModePoint; tex.normalized = 0; - HIPCHECK(hipBindTexture(&size, tex, (void *)texBuf, chanDesc, N * sizeof(float))); + HIPCHECK(hipBindTexture(&offset, tex, (void *)texBuf, chanDesc, N * sizeof(float))); + HIPCHECK(hipGetTextureAlignmentOffset(&offset,&tex)); dim3 dimBlock(64, 1, 1); dim3 dimGrid(N / dimBlock.x, 1, 1); diff --git a/tests/src/texture/hipGetAlgntoffset2D.cpp b/tests/src/texture/hipGetAlgntoffset2D.cpp deleted file mode 100644 index 3a4cc7dfc8..0000000000 --- a/tests/src/texture/hipGetAlgntoffset2D.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* -Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -/* HIT_START - * BUILD: %t %s ../test_common.cpp - * RUN: %t - * HIT_END - */ -#include -#include "test_common.h" - -using namespace std; -#define R 8 //rows, height -#define C 8 //columns, width - -texture tex; - -bool runTest(void); - -int main(int argc, char** argv) { - bool testResult=runTest(); - - if (testResult) { - passed(); - } else { - exit(EXIT_FAILURE); - } -} - -bool runTest() -{ -int val[R][C],i,j; -size_t offset; - -for(i=0;i