diff --git a/CMakeLists.txt b/CMakeLists.txt index d1a2b133c5..b8e2c6947b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,21 @@ add_to_config(_versionInfo HIP_VERSION_MAJOR) add_to_config(_versionInfo HIP_VERSION_MINOR) add_to_config(_versionInfo HIP_VERSION_PATCH) +set (HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR}) +set (HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR}) +if (${ROCM_PATCH_VERSION} ) + set (HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) +else () + set (HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH}) +endif () +set (HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}") + +if (DEFINED ENV{ROCM_RPATH}) + set (CMAKE_INSTALL_RPATH "$ENV{ROCM_RPATH}") + set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) + set (CMAKE_SKIP_BUILD_RPATH TRUE) +endif () + if(CMAKE_CXX_COMPILER MATCHES ".*hcc") set(HIP_COMPILER "hcc" CACHE STRING "HIP Compiler") set(HIP_PLATFORM "hcc" CACHE STRING "HIP Platform") @@ -114,6 +129,8 @@ if(NOT DEFINED ENV{HIP_RUNTIME}) set(HIP_RUNTIME "HCC" CACHE STRING "HIP Runtime") elseif (HIP_PLATFORM STREQUAL "vdi") set(HIP_RUNTIME "VDI" CACHE STRING "HIP Runtime") + elseif (HIP_PLATFORM STREQUAL "nvcc") + set(HIP_RUNTIME "CUDA" CACHE STRING "HIP Runtime") endif() endif() add_to_config(_buildInfo HIP_RUNTIME) @@ -207,19 +224,6 @@ if (NOT CPACK_SET_DESTDIR) set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm/hip" CACHE PATH "Default installation path of hcc installer package") endif (NOT CPACK_SET_DESTDIR) -# Check if we need to enable ATP marker -if(NOT DEFINED COMPILE_HIP_ATP_MARKER) - if(NOT DEFINED ENV{COMPILE_HIP_ATP_MARKER}) - set(COMPILE_HIP_ATP_MARKER 0) - else() - set(COMPILE_HIP_ATP_MARKER $ENV{COMPILE_HIP_ATP_MARKER}) - message(WARNING "HIP Markers are deprecated, please use roctracer/rocTX marker APIs.") - endif() -else() - message(WARNING "HIP Markers are deprecated, please use roctracer/rocTX marker APIs.") -endif() -add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) - ############################# # Profiling API support ############################# @@ -309,10 +313,6 @@ message(STATUS "\nHSA runtime in: " ${HSA_PATH}) if(HIP_PLATFORM STREQUAL "hcc") include_directories(${PROJECT_SOURCE_DIR}/include) set(HIP_HCC_BUILD_FLAGS) - if(COMPILE_HIP_ATP_MARKER) - include_directories(/opt/rocm/profiler/CXLActivityLogger/include) - set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DCOMPILE_HIP_ATP_MARKER=1") - endif() # Add HIP_VERSION to CMAKE__FLAGS set(HIP_HCC_BUILD_FLAGS "${HIP_HCC_BUILD_FLAGS} -DHIP_VERSION_MAJOR=${HIP_VERSION_MAJOR} -DHIP_VERSION_MINOR=${HIP_VERSION_MINOR} -DHIP_VERSION_PATCH=${HIP_VERSION_GITDATE}") @@ -328,7 +328,6 @@ if(HIP_PLATFORM STREQUAL "hcc") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${HIP_HCC_BUILD_FLAGS}") set(SOURCE_FILES_RUNTIME - src/code_object_bundle.cpp src/program_state.cpp src/hip_clang.cpp src/hip_hcc.cpp @@ -349,23 +348,6 @@ if(HIP_PLATFORM STREQUAL "hcc") src/env.cpp src/h2f.cpp) - set ( HIP_LIB_VERSION_MAJOR ${HIP_VERSION_MAJOR} ) - set ( HIP_LIB_VERSION_MINOR ${HIP_VERSION_MINOR} ) - if ( ${ROCM_PATCH_VERSION} ) - set ( HIP_LIB_VERSION_PATCH ${ROCM_PATCH_VERSION}) - else () - set ( HIP_LIB_VERSION_PATCH ${HIP_VERSION_PATCH} ) - endif () - set ( HIP_LIB_VERSION_STRING "${HIP_LIB_VERSION_MAJOR}.${HIP_LIB_VERSION_MINOR}.${HIP_LIB_VERSION_PATCH}" ) - - if ( DEFINED ENV{ROCM_RPATH} ) - set (CMAKE_INSTALL_RPATH "$ENV{ROCM_RPATH}" ) - set (CMAKE_BUILD_WITH_INSTALL_RPATH TRUE ) - set (CMAKE_SKIP_BUILD_RPATH TRUE ) - endif () - if(COMPILE_HIP_ATP_MARKER) - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger") - endif() add_library(hip_hcc SHARED ${SOURCE_FILES_RUNTIME}) add_library(hip_hcc_static STATIC ${SOURCE_FILES_RUNTIME}) @@ -376,7 +358,7 @@ if(HIP_PLATFORM STREQUAL "hcc") target_link_libraries(hip_hcc PRIVATE hc_am) target_link_libraries(hip_hcc_static PRIVATE hc_am) - add_library(hiprtc SHARED src/hiprtc.cpp src/code_object_bundle.cpp) + add_library(hiprtc SHARED src/hiprtc.cpp) target_compile_options(hiprtc PRIVATE -DDISABLE_REDUCED_GPU_BLOB_COPY) set_property ( TARGET hiprtc PROPERTY VERSION "${HIP_LIB_VERSION_STRING}" ) set_property ( TARGET hiprtc PROPERTY SOVERSION "${HIP_LIB_VERSION_MAJOR}" ) @@ -387,6 +369,9 @@ if(HIP_PLATFORM STREQUAL "hcc") set_target_properties(hip_hcc PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(hip_hcc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + set_target_properties(hiprtc PROPERTIES CXX_VISIBILITY_PRESET hidden) + set_target_properties(hiprtc PROPERTIES VISIBILITY_INLINES_HIDDEN 1) + if(HIP_PLATFORM STREQUAL "hcc") find_package(amd_comgr REQUIRED CONFIG @@ -419,7 +404,9 @@ if(HIP_PLATFORM STREQUAL "hcc") file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/lpl_ca) +endif() # Generate .hipVersion file(WRITE "${PROJECT_BINARY_DIR}/.hipVersion" ${_versionInfo}) @@ -448,7 +435,12 @@ endif() ############################# # Install hip_hcc if platform is hcc if(HIP_PLATFORM STREQUAL "hcc") - install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) + install(TARGETS hip_hcc_static hip_hcc hiprtc DESTINATION lib) +endif() + +# Install .hipInfo +if(HIP_PLATFORM STREQUAL "hcc" OR HIP_PLATFORM STREQUAL "vdi") + install(FILES ${PROJECT_BINARY_DIR}/.hipInfo DESTINATION lib) endif() # Install .hipInfo @@ -478,21 +470,12 @@ elseif( HIP_PLATFORM STREQUAL "vdi") endif() include(CMakePackageConfigHelpers) - if(HIP_COMPILER STREQUAL "hcc") - configure_package_config_file( - hip-config-hcc.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - elseif(HIP_COMPILER STREQUAL "clang") - configure_package_config_file( - hip-config-clang.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake - INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} - PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR - ) - endif() + configure_package_config_file( + hip-config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/hip-config.cmake + INSTALL_DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} + PATH_VARS LIB_INSTALL_DIR INCLUDE_INSTALL_DIR BIN_INSTALL_DIR + ) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/hip-config-version.cmake @@ -657,8 +640,12 @@ if(${RUN_HIT} EQUAL 0) include_directories(${HIP_SRC_PATH}/tests/src) hit_add_directory_recursive(${HIP_SRC_PATH}/tests/src "directed_tests") + # Add unit tests + include_directories(${HIP_SRC_PATH}/tests/unit) + hit_add_directory_recursive(${HIP_SRC_PATH}/tests/unit "unit_tests") + # Add top-level tests to build_tests - add_custom_target(build_tests DEPENDS directed_tests) + add_custom_target(build_tests DEPENDS directed_tests unit_tests) # Add custom target: check add_custom_target(check COMMAND "${CMAKE_COMMAND}" --build . --target test DEPENDS build_tests) diff --git a/Jenkinsfile b/Jenkinsfile index e38f7824d2..a7cec9fd1b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -295,13 +295,13 @@ def docker_upload_dockerhub( String local_org, String image_name, String remote_ String build_config = 'Release' String job_name = env.JOB_NAME.toLowerCase( ) -// The following launches 3 builds in parallel: rocm-head, rocm-3.1.x and cuda-10.x -parallel rocm_3_1: +// The following launches 3 builds in parallel: rocm-head, rocm-3.3.x and cuda-10.x +parallel rocm_3_3: { node('hip-rocm') { - String hcc_ver = 'rocm-3.1.x' - String from_image = 'ci_test_nodes/rocm-3.1.x/ubuntu-16.04:latest' + String hcc_ver = 'rocm-3.3.x' + String from_image = 'ci_test_nodes/rocm-3.3.x/ubuntu-16.04:latest' String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video' // Checkout source code, dependencies and version files diff --git a/LICENSE b/LICENSE.txt similarity index 85% rename from LICENSE rename to LICENSE.txt index 586fbd5a39..e44ba39fd0 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -1,5 +1,4 @@ -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2008-2020 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -13,11 +12,9 @@ all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - diff --git a/README.md b/README.md index 2bffd12162..13e7c8f335 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ ## What is this repository for? ### -HIP allows developers to convert CUDA code to portable C++. The same source code can be compiled to run on NVIDIA or AMD GPUs. +**HIP is a C++ Runtime API and Kernel Language that allows developers to create portable applications for AMD and NVIDIA GPUs from single source code.** + Key features include: * HIP is very thin and has little or no performance impact over coding directly in CUDA or hcc "HC" mode. * HIP allows coding in a single-source C++ programming language including features such as templates, C++11 lambdas, classes, namespaces, and more. * HIP allows developers to use the "best" development environment and tools on each target platform. -* The [HIPIFY](hipify-clang/README.md) tools automatically convert source from CUDA to HIP. +* The [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) tools automatically convert source from CUDA to HIP. * Developers can specialize for the platform (CUDA or hcc) to tune for performance or handle tricky cases New projects can be developed directly in the portable HIP C++ language and can run on either NVIDIA or AMD platforms. Additionally, HIP provides porting tools which make it easy to port existing CUDA codes to the HIP layer, with no loss of performance as compared to the original CUDA application. HIP is not intended to be a drop-in replacement for CUDA, and developers should expect to do some manual coding and performance tuning work to complete the port. @@ -36,7 +37,7 @@ HIP releases are typically of two types. The tag naming convention is different - [HIP Profiling ](docs/markdown/hip_profiling.md) - [HIP Debugging](docs/markdown/hip_debugging.md) - [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenCL) -- [HIPIFY](hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - Supported CUDA APIs: * [Runtime API](docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md) * [Driver API](docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md) @@ -114,7 +115,7 @@ provides source portability to either platform. HIP provides the _hipcc_ compi ## Examples and Getting Started: -* A sample and [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that uses any of [HIPIFY](hipify-clang/README.md) tools to convert a simple app from CUDA to HIP: +* A sample and [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that uses any of [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) tools to convert a simple app from CUDA to HIP: ```shell diff --git a/bin/extractkernel b/bin/extractkernel new file mode 100755 index 0000000000..81760f50de --- /dev/null +++ b/bin/extractkernel @@ -0,0 +1,249 @@ +#!/usr/bin/perl +use strict; +use File::Copy; +use File::Spec; +use File::Basename; +use File::Which; +use Cwd 'realpath'; +use Getopt::Std; +use List::Util qw(max); + +sub usage { + print("Usage: $0 [OPTION]... -i \n"); + print("Extract the device kernels from an hcc executable.\n\n"); + print("-h \t\t\t\tshow this help message\n"); + print("-i \t\t\t\tinput file\n"); + exit; +} + +my $debug = 0; + +# use clang offload bundler (instead of "dd") +# to extract device object from the bundle +my $use_clang_offload_bundler = 1; + +my %options=(); +getopts('hi:', \%options); + +if (!%options || defined $options{h}) { + usage(); +} + +my $input_file; +defined $options{i} || die("input not specified"); +$input_file = $options{i}; +(-f $input_file) || die("can't find $input_file"); + +# look for llvm-objdump and clang-offload-bundler +my $tools_path_prefix; +my $llvm_objdump; +my $clang_offload_bundler; + +if (defined $ENV{'HCC_HOME'}) { + $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin"); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); +} +else { + $tools_path_prefix = dirname(realpath($0)); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); + if (!(-f $llvm_objdump)) { + $tools_path_prefix = realpath($tools_path_prefix."/../../hcc/bin"); + $llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump"); + $clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler"); + } +} + +if (!(-f $llvm_objdump)) { + $llvm_objdump = which("llvm-objdump"); + if (!(-f $llvm_objdump)) { + die("Can't find llvm-objdump\n"); + } +} + +if (!(-f $clang_offload_bundler)) { + $clang_offload_bundler = which("clang-offload-bundler"); + if (!(-f $clang_offload_bundler)) { + die("Can't find clang-offload-bundler\n"); + } +} + +# kernel section information for HCC +my $kernel_section_name = ".kernel"; +my $kernel_triple = "hcc-amdgcn-amd-amdhsa--"; +my $kernel_blob_alignment = 1; + +my $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`); +if (!$kernel_section_size) { + + # If there isn't a section created by HCC, + # try to detect a kernel section created by HIP-Clang + $kernel_section_name = ".hip_fatbin"; + $kernel_triple = "hip-amdgcn-amd-amdhsa-"; + $kernel_blob_alignment = 8; + + $kernel_section_size = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`); + $kernel_section_size or die("No kernel section found\n"); +} + +my $kernel_section_offset = hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'`); +my $kernel_section_end = $kernel_section_offset + $kernel_section_size; +if ($debug) { + print "kernel section size: $kernel_section_size\n"; + print "kernel section offset: $kernel_section_offset\n"; + print "kernel section end: $kernel_section_end\n"; +} + +# parse kernel bundle header +open INPUT_FP, $input_file || die $!; +binmode INPUT_FP; + +my $current_blob_offset = $kernel_section_offset; +my $num_blobs = 0; +#while ($current_blob_offset < $kernel_section_end) { +while(1) { + + # adjust the offset to the blob alignment + $current_blob_offset = int(($current_blob_offset + ($kernel_blob_alignment - 1)) / $kernel_blob_alignment) * $kernel_blob_alignment; + if ($debug) { + print "Current blob offset: $current_blob_offset\n"; + } + + if ($current_blob_offset >= $kernel_section_end) { + if ($debug) { + print "reached end of kernel section\n"; + } + last; + } + + seek(INPUT_FP, $current_blob_offset, 0); + + # skip OFFLOAD_BUNDLER_MAGIC_STR + my $magic_str; + my $read_bytes = read(INPUT_FP, $magic_str, 24); + if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) { + # didn't detect the bundle magic string + if ($debug) { + print "Offload bundle magic string not detected\n"; + } + last; + } + # read number of bundles + my $num_bundles; + $read_bytes = read(INPUT_FP, $num_bundles, 8); + $read_bytes == 8 or die("Fail to parse number of bundles\n"); + $num_bundles = unpack("Q", $num_bundles); + if ($debug) { + print "Blob $num_blobs, number of bundles: $num_bundles\n"; + } + + # detected GPU targets + my @asic_target_array; + + my $last_bundle_offset = 0; + my $last_bundle_size = 0; + + # strings for creating new files + my $file_blob_number = sprintf("%03d", $num_blobs); + my $filename_prefix = "${input_file}-${file_blob_number}"; + + my $clang_offloadbundler_outputs="-outputs=/dev/null"; + my $clang_offloadbundler_targets="-targets=host-x86_64-unknown-linux"; + + for (my $iter = 0; $iter < $num_bundles; $iter++) { + # read bundle offset + my $offset; + $read_bytes = read(INPUT_FP, $offset, 8); + $read_bytes == 8 or die("Fail to parse bundle offset\n"); + $offset = unpack("Q", $offset); + $last_bundle_offset = max($last_bundle_offset, $offset); + + # read bundle size + my $size; + $read_bytes = read(INPUT_FP, $size, 8); + $read_bytes == 8 or die("Fail to parse bundle size\n"); + $size = unpack("Q", $size); + if ($last_bundle_offset == $offset) { + $last_bundle_size = $size; + } + + # read triple size + my $triple_size; + $read_bytes = read(INPUT_FP, $triple_size, 8); + $read_bytes == 8 or die("Fail to parse triple size\n"); + $triple_size = unpack("Q", $triple_size); + + # triple + my $triple; + $read_bytes = read(INPUT_FP, $triple, $triple_size); + $read_bytes == $triple_size or die("Fail to parse triple\n"); + + if ($debug) { + print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n"); + } + + # Only process GPU targets, skip host targets + my $triple_pattern = "^$kernel_triple"; + if ($triple =~ /$triple_pattern/) { + my $asic_target = substr($triple, length($kernel_triple)); + + # augment arguments for clang-offload-bundler + my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; + $clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}"; + $clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}"; + + # add into asic_target_array + $asic_target_array[$#asic_target_array + 1]=$asic_target; + + if (!$use_clang_offload_bundler) { + my $offset_for_hsaco = $current_blob_offset + $offset; + my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none"; + if ($debug) { + print("extract code bundle with dd: $dd_command\n"); + } + system($dd_command) == 0 + or die("Fail to extract code bundle with dd\n"); + } + + } else { + #print("Host target: " . $Triple . "\n"); + } + } + + # extract the code blob + my $blob_filename = "${filename_prefix}.bundle"; + my $write_bytes = $last_bundle_offset + $last_bundle_size; + system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0 + or die("Extracting kernel bundle file failed: $?"); + + if ($use_clang_offload_bundler) { + # use clang-offload-bundler to unbundle HSACO + my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}"; + if ($debug) { + print("clang offload bundler command: $command\n"); + } + system($command) == 0 + or die("Fail to execute clang-offload-bundler"); + } + + for (my $iter = 0; $iter <= $#asic_target_array; $iter++) { + my $asic_target = $asic_target_array[$iter]; + my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco"; + my $isa_file_name = "${filename_prefix}-${asic_target}.isa"; + + # use llvm-objdump to dump out GCN ISA + system("$llvm_objdump --disassemble --mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target); + + if ($debug) { + print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n"); + } + } + + $current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size; + $num_blobs++; +} + +$num_blobs or die("No device code found.\n"); +exit(0); + diff --git a/bin/hipcc b/bin/hipcc index cc3f8ae77c..dcf98d2556 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -85,6 +85,7 @@ $HIP_LIB_PATH=$ENV{'HIP_LIB_PATH'}; $HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'}; $DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'}; $HIP_CLANG_HCC_COMPAT_MODE=$ENV{'HIP_CLANG_HCC_COMPAT_MODE'}; # HCC compatibility mode +$HIP_COMPILE_CXX_AS_HIP=$ENV{'HIP_COMPILE_CXX_AS_HIP'} // "1"; if (defined $HIP_VDI_HOME) { $HIP_INFO_PATH= "$HIP_VDI_HOME/lib/.hipInfo"; @@ -134,6 +135,7 @@ if (defined $HIP_RUNTIME and $HIP_RUNTIME eq "VDI" and !defined $HIP_VDI_HOME) { $HIP_VDI_HOME = $HIP_PATH; # use HIP_PATH } $HIPCXXFLAGS .= "-D__HIP_VDI__"; + $HIPCFLAGS .= "-D__HIP_VDI__"; } if (defined $HIP_VDI_HOME) { @@ -207,12 +209,10 @@ if ($HIP_PLATFORM eq "clang") { } else { $HIPCXXFLAGS .= " -std=c++11"; } - $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH"; + $HIPCXXFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; + $HIPCFLAGS .= " -isystem $HIP_CLANG_INCLUDE_PATH/.."; $HIPLDFLAGS .= " -L$HIP_LIB_PATH"; - if (not $isWindows) { - $HIPLDFLAGS .= " -Wl,--rpath-link=$HIP_LIB_PATH"; - $HIPLDFLAGS .= " -lhip_hcc"; - } else { + if ($isWindows) { $HIPLDFLAGS .= " -lamdhip64"; } if ($HIP_CLANG_HCC_COMPAT_MODE) { @@ -222,8 +222,10 @@ if ($HIP_PLATFORM eq "clang") { $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; - if (!($HIP_RUNTIME eq "HCC")) { - $HIPCXXFLAGS .= " -D__HIP_VDI__ -fhip-new-launch-api"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; + if ($HIP_RUNTIME ne "HCC" ) { + $HIPCXXFLAGS .= " -D__HIP_VDI__"; + $HIPCFLAGS .= " -D__HIP_VDI__"; } } elsif ($HIP_PLATFORM eq "hcc") { @@ -282,8 +284,11 @@ if ($HIP_PLATFORM eq "clang") { } $HIPCXXFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; + $HIPCFLAGS .= " -isystem $HIP_PATH/include/hip/hcc_detail/cuda"; $HIPCXXFLAGS .= " -isystem $HSA_PATH/include"; + $HIPCFLAGS .= " -isystem $HSA_PATH/include"; $HIPCXXFLAGS .= " -Wno-deprecated-register"; + $HIPCFLAGS .= " -Wno-deprecated-register"; $HIPLDFLAGS .= " -L$HSA_PATH/lib -L$ROCM_PATH/lib -lhsa-runtime64 -lhc_am "; # $HIPLDFLAGS .= " -L$HCC_HOME/compiler/lib -lLLVMAMDGPUDesc -lLLVMAMDGPUUtils -lLLVMMC -lLLVMCore -lLLVMSupport "; @@ -321,6 +326,7 @@ if ($HIP_PLATFORM eq "clang") { $HIPCC="$CUDA_PATH/bin/nvcc"; $HIPCXXFLAGS .= " -Wno-deprecated-gpu-targets "; $HIPCXXFLAGS .= " -isystem $CUDA_PATH/include"; + $HIPCFLAGS .= " -isystem $CUDA_PATH/include"; $HIPLDFLAGS = " -Wno-deprecated-gpu-targets -lcuda -lcudart -L$CUDA_PATH/lib64"; } else { @@ -330,19 +336,30 @@ if ($HIP_PLATFORM eq "clang") { # Add paths to common HIP includes: $HIPCXXFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; +$HIPCFLAGS .= " -isystem $HIP_INCLUDE_PATH" ; my $compileOnly = 0; my $needCXXFLAGS = 0; # need to add CXX flags to compile step +my $needCFLAGS = 0; # need to add C flags to compile step my $needLDFLAGS = 1; # need to add LDFLAGS to compile step. -my $hasC = 0; # options contain a c-style file (NVCC must force recognition as GPU file) +my $hasC = 0; # options contain a c-style file +my $hasCXX = 0; # options contain a cpp-style file (NVCC must force recognition as GPU file) my $hasCU = 0; # options contain a cu-style file (HCC must force recognition as GPU file) +my $hasHIP = 0; # options contain a hip-style file (HIP-Clang must pass offloading options) my $needHipHcc = ($HIP_PLATFORM eq 'hcc'); # set if we need to link hip_hcc.o from src tree. (some builds, ie cmake, provide their own) my $printHipVersion = 0; # print HIP version +my $printCXXFlags = 0; # print HIPCXXFLAGS +my $printLDFlags = 0; # print HIPLDFLAGS my $runCmd = 1; my $buildDeps = 0; my $linkType = 1; my $setLinkType = 0; my $coFormatv3 = 1; +if(defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") { + $coFormatv3 = 0; +} +my $funcSupp = 0; # enable function support +my $rdc = 0; # whether -fgpu-rdc is on my @options = (); my @inputs = (); @@ -392,6 +409,7 @@ my $optArg = ""; # -O args my $targetOpt = '--amdgpu-target='; my $targetsStr = ""; my $skipOutputFile = 0; # file followed by -o should not contibute in picking compiler flags +my $prevArg = ""; # previous argument foreach $arg (@ARGV) { @@ -400,12 +418,12 @@ foreach $arg (@ARGV) my $swallowArg = 0; if ($arg eq '-c' or $arg eq '--genco') { $compileOnly = 1; - $needCXXFLAGS = 1; $needLDFLAGS = 0; } if ($skipOutputFile) { $toolArgs .= " $arg"; + $prevArg = $arg; $skipOutputFile = 0; next; } @@ -457,17 +475,27 @@ foreach $arg (@ARGV) $printHipVersion = 1; $runCmd = 0; } + if($trimarg eq '--cxxflags') { + $printCXXFlags = 1; + $runCmd = 0; + } + if($trimarg eq '--ldflags') { + $printLDFlags = 1; + $runCmd = 0; + } if($trimarg eq '-M') { $compileOnly = 1; $buildDeps = 1; } if($trimarg eq '-use_fast_math') { $HIPCXXFLAGS .= " -DHIP_FAST_MATH "; + $HIPCFLAGS .= " -DHIP_FAST_MATH "; } if(($trimarg eq '-use-staticlib') and ($setLinkType eq 0)) { $linkType = 0; $setLinkType = 1; + $swallowArg = 1; } if(($trimarg eq '-use-sharedlib') and ($setLinkType eq 0)) { @@ -590,8 +618,15 @@ foreach $arg (@ARGV) $toolArgs = substr $toolArgs, 0, -8; chomp $toolArgs; } + } elsif ($arg eq 'hip' and $prevArg eq '-x') { + $hasHIP = 1; } elsif ($arg =~ m/^-/) { # options start with - + if ($arg eq '-fgpu-rdc') { + $rdc = 1; + } elsif ($arg eq '-fno-gpu-rdc') { + $rdc = 0; + } # Process HIPCC options here: if ($arg =~ m/^--hipcc/) { @@ -599,31 +634,45 @@ foreach $arg (@ARGV) #if $arg eq "--hipcc_profile") { # Example argument here, hipcc # #} + if ($arg eq "--hipcc-func-supp") { + $funcSupp = 1; + } elsif ($arg eq "--hipcc-no-func-supp") { + $funcSupp = 0; + } } else { push (@options, $arg); } #print "O: <$arg>\n"; - } else { + } elsif ($prevArg ne '-o') { # input files and libraries - if (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.c$/) or ($arg =~ /\.cc$/) ) { + if ($arg =~ /\.c$/) { $hasC = 1; + $needCFLAGS = 1; + $toolArgs .= " -x c" + } + elsif (($arg =~ /\.cpp$/) or ($arg =~ /\.cxx$/) or ($arg =~ /\.cc$/) ) { $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang') { - $toolArgs .= " -x hip" + if ($HIP_COMPILE_CXX_AS_HIP eq '0' or $HIP_COMPILER ne "clang") { + $hasCXX = 1; + } else { + $hasHIP = 1; + $toolArgs .= " -x hip"; } } - if (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/) or ($arg =~ /\.hip$/)) { - $hasCU = 1; + elsif ((($arg =~ /\.cu$/ or $arg =~ /\.cuh$/) and $HIP_COMPILE_CXX_AS_HIP ne '0') or ($arg =~ /\.hip$/)) { $needCXXFLAGS = 1; - if ($HIP_PLATFORM eq 'clang') { - $toolArgs .= " -x hip" + if ($HIP_COMPILER eq "clang") { + $hasHIP = 1; + $toolArgs .= " -x hip"; + } else { + $hasCU = 1; } } - push (@inputs, $arg); #print "I: <$arg>\n"; } $toolArgs .= " $arg" unless $swallowArg; + $prevArg = $arg; } if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ @@ -657,9 +706,9 @@ if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){ my $archMacro = ' -D__HIP_ARCH_' . uc($val) . '__=1 '; # Add the arch option and macro to the compiler options. $GPU_ARCH_ARG = $GPU_ARCH_OPT . $val; - $HIPLDFLAGS .= $GPU_ARCH_ARG; + $HIPLDARCHFLAGS .= $GPU_ARCH_ARG; $HIPCXXFLAGS .= $archMacro; - if ($HIP_PLATFORM eq 'clang') { + if ($HIP_PLATFORM eq 'clang' and $hasHIP) { $HIPCXXFLAGS .= $GPU_ARCH_ARG; } @@ -685,7 +734,7 @@ if ($coFormatv3 and $HIP_PLATFORM eq 'hcc') { $HIPCXXFLAGS .= " -mcode-object-v3"; } -if ($hasC and $HIP_PLATFORM eq 'nvcc') { +if ($hasCXX and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -x cu"; } if ($hasCU and $HIP_PLATFORM eq 'hcc') { @@ -694,15 +743,17 @@ if ($hasCU and $HIP_PLATFORM eq 'hcc') { if ($buildDeps and $HIP_PLATFORM eq 'nvcc') { $HIPCXXFLAGS .= " -M -D__CUDACC__"; + $HIPCFLAGS .= " -M -D__CUDACC__"; } if ($buildDeps and $HIP_PLATFORM eq 'clang') { $HIPCXXFLAGS .= " --cuda-host-only"; } -# Add --hip-link only if there are no source files. -if (!$needCXXFLAGS and $HIP_PLATFORM eq 'clang') { +# Add --hip-link only if it is compile only and -fgpu-rdc is on. +if ($rdc and !$compileOnly and $HIP_PLATFORM eq 'clang') { $HIPLDFLAGS .= " --hip-link"; + $HIPLDFLAGS .= $HIPLDARCHFLAGS; } if ($setStdLib eq 0 and $HIP_PLATFORM eq 'hcc') @@ -727,35 +778,55 @@ if ($HIP_PLATFORM eq "clang") { # Set default optimization level to -O3 for hip-clang. if ($optArg eq "") { $HIPCXXFLAGS .= " -O3"; + $HIPCFLAGS .= " -O3"; $HIPLDFLAGS .= " -O3"; } # Do not pass -mllvm on Windows since there is a clang bug causing duplicate -mllvm options in clang -cc1 on Windows. # ToDo : remove restriction for Windows after clang bug is fixed. - if ($optArg ne "-O0" and not $isWindows) { + if (!$funcSupp and $optArg ne "-O0" and not $isWindows and $hasHIP) { $HIPCXXFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; if ($needLDFLAGS and not $needCXXFLAGS) { $HIPLDFLAGS .= " -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false"; } } $HIP_DEVLIB_FLAGS = " --hip-device-lib-path=$DEVICE_LIB_PATH"; - $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; + if ($hasHIP) { + $HIPCXXFLAGS .= " $HIP_DEVLIB_FLAGS"; + if ($HIP_RUNTIME ne "HCC") { + $HIPCXXFLAGS .= " -fhip-new-launch-api"; + } + } if (not $isWindows) { $HIPLDFLAGS .= " -lgcc_s -lgcc -lpthread -lm"; } -} + if (not $isWindows and not $compileOnly) { + if ($linkType eq 0) { + $toolArgs .= " -L$HIP_LIB_PATH -lamdhip64_static -L$ROCM_PATH/lib -lhsa-runtime64 -ldl "; + } else { + $toolArgs .= " -Wl,--enable-new-dtags -Wl,--rpath=$HIP_LIB_PATH:$ROCM_PATH/lib -lhip_hcc "; + } + } +} if ($HIPCC_COMPILE_FLAGS_APPEND) { $HIPCXXFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; + $HIPCFLAGS .= " $HIPCC_COMPILE_FLAGS_APPEND"; } if ($HIPCC_LINK_FLAGS_APPEND) { $HIPLDFLAGS .= " $HIPCC_LINK_FLAGS_APPEND"; } my $CMD="$HIPCC"; + +if ($needCFLAGS) { + $CMD .= " $HIPCFLAGS"; +} + if ($needCXXFLAGS) { $CMD .= " $HIPCXXFLAGS"; } + if ($needLDFLAGS and not $compileOnly) { $CMD .= " $HIPLDFLAGS"; } @@ -771,6 +842,12 @@ if ($printHipVersion) { } print $HIP_VERSION, "\n"; } +if ($printCXXFlags) { + print $HIPCXXFLAGS; +} +if ($printLDFlags) { + print $HIPLDFLAGS; +} if ($runCmd) { if ($HIP_PLATFORM eq "hcc" and exists($hipConfig{'HCC_VERSION'}) and $HCC_VERSION ne $hipConfig{'HCC_VERSION'}) { print ("HIP ($HIP_PATH) was built using hcc $hipConfig{'HCC_VERSION'}, but you are using $HCC_HOME/hcc with version $HCC_VERSION from hipcc. Please rebuild HIP including cmake or update HCC_HOME variable.\n") ; diff --git a/bin/hipconfig b/bin/hipconfig index c56b56ecd8..2dcc81fa76 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -1,7 +1,7 @@ #!/usr/bin/perl -w $HIP_BASE_VERSION_MAJOR = "3"; -$HIP_BASE_VERSION_MINOR = "2"; +$HIP_BASE_VERSION_MINOR = "5"; # Need perl > 5.10 to use logic-defined or use 5.006; use v5.10.1; @@ -19,6 +19,7 @@ GetOptions( ,"path|p" => \$p_path ,"compiler|c" => \$p_compiler ,"platform|P" => \$p_platform + ,"runtime|r" => \$p_runtime ,"cpp_config|cxx_config|C" => \$p_cpp_config ,"full|f|info" => \$p_full, ,"version|v" => \$p_version, @@ -30,8 +31,9 @@ if ($p_help) { print "usage: hipconfig [OPTIONS]\n"; print " --path, -p : print HIP_PATH (use env var if set, else determine from hipconfig path)\n"; print " --cpp_config, -C : print C++ compiler options\n"; - print " --compiler, -c : print compiler (hcc or nvcc)\n"; + print " --compiler, -c : print compiler (hcc or clang or nvcc)\n"; print " --platform, -P : print platform (hcc or nvcc)\n"; + print " --runtime, -r : print runtime (HCC or VDI)\n"; print " --full, -f : print full config\n"; print " --version, -v : print hip version\n"; print " --check : check configuration\n"; @@ -82,13 +84,22 @@ if (-e "$HIP_PATH/../.info/version") { $CUDA_PATH=$ENV{'CUDA_PATH'} // '/usr/local/cuda'; $HCC_HOME=$ENV{'HCC_HOME'} // "$ROCM_PATH/hcc"; $HSA_PATH=$ENV{'HSA_PATH'} // "$ROCM_PATH/hsa"; +$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'} // "$ROCM_PATH/llvm/bin"; #--- #HIP_PLATFORM controls whether to use NVCC or HCC for compilation: $HIP_PLATFORM=$ENV{'HIP_PLATFORM'}; +# Read .hipInfo +my %hipInfo = (); +parse_config_file("$HIP_PATH/lib/.hipInfo", \%hipInfo); +$HIP_COMPILER = $hipInfo{'HIP_COMPILER'} // "hcc"; +$HIP_RUNTIME = $hipInfo{'HIP_RUNTIME'} // "HCC"; + if (not defined $HIP_PLATFORM) { if (can_run("$HCC_HOME/bin/hcc") or can_run("hcc")) { $HIP_PLATFORM = "hcc"; + } elsif (can_run("$HIP_CLANG_PATH/clang++") or can_run("clang++")) { + $HIP_PLATFORM = "hcc"; } elsif (can_run("$CUDA_PATH/bin/nvcc") or can_run("nvcc")) { $HIP_PLATFORM = "nvcc"; } else { @@ -97,8 +108,15 @@ if (not defined $HIP_PLATFORM) { } } -if ($HIP_PLATFORM eq "hcc") { - $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; +if ($HIP_COMPILER eq "hcc") { + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HCC_HOME/include -I$HSA_PATH/include"; +} +if ($HIP_COMPILER eq "clang") { + $HIP_CLANG_VERSION = `$HIP_CLANG_PATH/clang++ --version`; + $HIP_CLANG_VERSION=~/.*clang version ([^ ]+).*/; + $HIP_CLANG_VERSION=$1; + + $CPP_CONFIG= " -D__HIP_PLATFORM_HCC__= -I$HIP_PATH/include -I$HIP_CLANG_PATH/../lib/clang/$HIP_CLANG_VERSION -I$HSA_PATH/include"; } if ($HIP_PLATFORM eq "nvcc") { $CPP_CONFIG = " -D__HIP_PLATFORM_NVCC__= -I$HIP_PATH/include -I$CUDA_PATH/include"; @@ -118,18 +136,26 @@ if ($p_path) { $printed = 1; } - if ($p_cpp_config) { print $CPP_CONFIG; $printed = 1; } +if ($p_compiler) { + print $HIP_COMPILER; + $printed = 1; +} -if ($p_compiler or $p_platform) { +if ($p_platform) { print $HIP_PLATFORM; $printed = 1; } +if ($p_runtime) { + print $HIP_RUNTIME; + $printed = 1; +} + if ($p_version) { print $HIP_VERSION; $printed = 1; @@ -139,21 +165,41 @@ if (!$printed or $p_full) { print "HIP version : ", $HIP_VERSION, "\n\n"; print "== hipconfig\n"; print "HIP_PATH : ", $HIP_PATH, "\n"; + print "HIP_COMPILER : ", $HIP_COMPILER, "\n"; print "HIP_PLATFORM : ", $HIP_PLATFORM, "\n"; + print "HIP_RUNTIME : ", $HIP_RUNTIME, "\n"; print "CPP_CONFIG : ", $CPP_CONFIG, "\n"; if ($HIP_PLATFORM eq "hcc") { print "\n" ; - print "== hcc\n"; - print ("HSA_PATH : $HSA_PATH\n"); - print ("HCC_HOME : $HCC_HOME\n"); - system("$HCC_HOME/bin/hcc --version"); - system("$HCC_HOME/bin/llc --version"); - print ("HCC-cxxflags : "); - system("$HCC_HOME/bin/hcc-config --cxxflags"); - print ("HCC-ldflags : "); - system("$HCC_HOME/bin/hcc-config --ldflags"); - printf("\n"); + if ($HIP_COMPILER eq "hcc") + { + print "== hcc\n"; + print ("HSA_PATH : $HSA_PATH\n"); + print ("HCC_HOME : $HCC_HOME\n"); + system("$HCC_HOME/bin/hcc --version"); + system("$HCC_HOME/bin/llc --version"); + print ("HCC-cxxflags : "); + system("$HCC_HOME/bin/hcc-config --cxxflags"); + printf("\n"); + print ("HCC-ldflags : "); + system("$HCC_HOME/bin/hcc-config --ldflags"); + printf("\n"); + } + if ($HIP_COMPILER eq "clang") + { + print "== hip-clang\n"; + print ("HSA_PATH : $HSA_PATH\n"); + print ("HIP_CLANG_PATH : $HIP_CLANG_PATH\n"); + system("$HIP_CLANG_PATH/clang++ --version"); + system("$HIP_CLANG_PATH/llc --version"); + print ("hip-clang-cxxflags : "); + system("$HIP_PATH/bin/hipcc --cxxflags"); + printf("\n"); + print ("hip-clang-ldflags : "); + system("$HIP_PATH/bin/hipcc --ldflags"); + printf("\n"); + } } if ($HIP_PLATFORM eq "nvcc") { print "\n" ; diff --git a/bin/hipify-perl b/bin/hipify-perl index 62cd8436f8..83e72ee711 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -341,8 +341,8 @@ sub simpleSubstitutions { $ft{'execution'} += s/\bcudaLaunchCooperativeKernelMultiDevice\b/hipLaunchCooperativeKernelMultiDevice/g; $ft{'execution'} += s/\bcudaLaunchKernel\b/hipLaunchKernel/g; $ft{'execution'} += s/\bcudaSetupArgument\b/hipSetupArgument/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; - $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessor\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessor/g; + $ft{'occupancy'} += s/\bcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; $ft{'occupancy'} += s/\bcuOccupancyMaxPotentialBlockSize\b/hipOccupancyMaxPotentialBlockSize/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessor\b/hipOccupancyMaxActiveBlocksPerMultiprocessor/g; $ft{'occupancy'} += s/\bcudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags\b/hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags/g; @@ -742,8 +742,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcurandSetQuasiRandomGeneratorDimensions\b/hiprandSetQuasiRandomGeneratorDimensions/g; $ft{'library'} += s/\bcurandSetStream\b/hiprandSetStream/g; $ft{'library'} += s/\bcusparseCaxpyi\b/hipsparseCaxpyi/g; + $ft{'library'} += s/\bcusparseCbsrmv\b/hipsparseCbsrmv/g; $ft{'library'} += s/\bcusparseCcsr2csc\b/hipsparseCcsr2csc/g; $ft{'library'} += s/\bcusparseCcsr2hyb\b/hipsparseCcsr2hyb/g; + $ft{'library'} += s/\bcusparseCcsrgeam\b/hipsparseCcsrgeam/g; + $ft{'library'} += s/\bcusparseCcsrgeam2\b/hipsparseCcsrgeam2/g; + $ft{'library'} += s/\bcusparseCcsrgeam2_bufferSizeExt\b/hipsparseCcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseCcsrgemm\b/hipsparseCcsrgemm/g; $ft{'library'} += s/\bcusparseCcsrgemm2\b/hipsparseCcsrgemm2/g; $ft{'library'} += s/\bcusparseCcsrgemm2_bufferSizeExt\b/hipsparseCcsrgemm2_bufferSizeExt/g; @@ -754,6 +758,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCcsrmm\b/hipsparseCcsrmm/g; $ft{'library'} += s/\bcusparseCcsrmm2\b/hipsparseCcsrmm2/g; $ft{'library'} += s/\bcusparseCcsrmv\b/hipsparseCcsrmv/g; + $ft{'library'} += s/\bcusparseCcsrsm2_analysis\b/hipsparseCcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseCcsrsm2_bufferSizeExt\b/hipsparseCcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseCcsrsm_solve\b/hipsparseCcsrsm_solve/g; $ft{'library'} += s/\bcusparseCcsrsv2_analysis\b/hipsparseCcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSize\b/hipsparseCcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseCcsrsv2_bufferSizeExt\b/hipsparseCcsrsv2_bufferSizeExt/g; @@ -763,17 +770,24 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseCgthr\b/hipsparseCgthr/g; $ft{'library'} += s/\bcusparseCgthrz\b/hipsparseCgthrz/g; $ft{'library'} += s/\bcusparseChybmv\b/hipsparseChybmv/g; + $ft{'library'} += s/\bcusparseCnnz\b/hipsparseCnnz/g; + $ft{'library'} += s/\bcusparseCnnz_compress\b/hipsparseCnnz_compress/g; $ft{'library'} += s/\bcusparseCreate\b/hipsparseCreate/g; $ft{'library'} += s/\bcusparseCreateCsrgemm2Info\b/hipsparseCreateCsrgemm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrilu02Info\b/hipsparseCreateCsrilu02Info/g; + $ft{'library'} += s/\bcusparseCreateCsrsm2Info\b/hipsparseCreateCsrsm2Info/g; $ft{'library'} += s/\bcusparseCreateCsrsv2Info\b/hipsparseCreateCsrsv2Info/g; $ft{'library'} += s/\bcusparseCreateHybMat\b/hipsparseCreateHybMat/g; $ft{'library'} += s/\bcusparseCreateIdentityPermutation\b/hipsparseCreateIdentityPermutation/g; $ft{'library'} += s/\bcusparseCreateMatDescr\b/hipsparseCreateMatDescr/g; $ft{'library'} += s/\bcusparseCsctr\b/hipsparseCsctr/g; $ft{'library'} += s/\bcusparseDaxpyi\b/hipsparseDaxpyi/g; + $ft{'library'} += s/\bcusparseDbsrmv\b/hipsparseDbsrmv/g; $ft{'library'} += s/\bcusparseDcsr2csc\b/hipsparseDcsr2csc/g; $ft{'library'} += s/\bcusparseDcsr2hyb\b/hipsparseDcsr2hyb/g; + $ft{'library'} += s/\bcusparseDcsrgeam\b/hipsparseDcsrgeam/g; + $ft{'library'} += s/\bcusparseDcsrgeam2\b/hipsparseDcsrgeam2/g; + $ft{'library'} += s/\bcusparseDcsrgeam2_bufferSizeExt\b/hipsparseDcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseDcsrgemm\b/hipsparseDcsrgemm/g; $ft{'library'} += s/\bcusparseDcsrgemm2\b/hipsparseDcsrgemm2/g; $ft{'library'} += s/\bcusparseDcsrgemm2_bufferSizeExt\b/hipsparseDcsrgemm2_bufferSizeExt/g; @@ -784,6 +798,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDcsrmm\b/hipsparseDcsrmm/g; $ft{'library'} += s/\bcusparseDcsrmm2\b/hipsparseDcsrmm2/g; $ft{'library'} += s/\bcusparseDcsrmv\b/hipsparseDcsrmv/g; + $ft{'library'} += s/\bcusparseDcsrsm2_analysis\b/hipsparseDcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseDcsrsm2_bufferSizeExt\b/hipsparseDcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseDcsrsm_solve\b/hipsparseDcsrsm_solve/g; $ft{'library'} += s/\bcusparseDcsrsv2_analysis\b/hipsparseDcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSize\b/hipsparseDcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseDcsrsv2_bufferSizeExt\b/hipsparseDcsrsv2_bufferSizeExt/g; @@ -792,12 +809,15 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseDestroy\b/hipsparseDestroy/g; $ft{'library'} += s/\bcusparseDestroyCsrgemm2Info\b/hipsparseDestroyCsrgemm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrilu02Info\b/hipsparseDestroyCsrilu02Info/g; + $ft{'library'} += s/\bcusparseDestroyCsrsm2Info\b/hipsparseDestroyCsrsm2Info/g; $ft{'library'} += s/\bcusparseDestroyCsrsv2Info\b/hipsparseDestroyCsrsv2Info/g; $ft{'library'} += s/\bcusparseDestroyHybMat\b/hipsparseDestroyHybMat/g; $ft{'library'} += s/\bcusparseDestroyMatDescr\b/hipsparseDestroyMatDescr/g; $ft{'library'} += s/\bcusparseDgthr\b/hipsparseDgthr/g; $ft{'library'} += s/\bcusparseDgthrz\b/hipsparseDgthrz/g; $ft{'library'} += s/\bcusparseDhybmv\b/hipsparseDhybmv/g; + $ft{'library'} += s/\bcusparseDnnz\b/hipsparseDnnz/g; + $ft{'library'} += s/\bcusparseDnnz_compress\b/hipsparseDnnz_compress/g; $ft{'library'} += s/\bcusparseDroti\b/hipsparseDroti/g; $ft{'library'} += s/\bcusparseDsctr\b/hipsparseDsctr/g; $ft{'library'} += s/\bcusparseGetMatDiagType\b/hipsparseGetMatDiagType/g; @@ -808,8 +828,12 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseGetStream\b/hipsparseGetStream/g; $ft{'library'} += s/\bcusparseGetVersion\b/hipsparseGetVersion/g; $ft{'library'} += s/\bcusparseSaxpyi\b/hipsparseSaxpyi/g; + $ft{'library'} += s/\bcusparseSbsrmv\b/hipsparseSbsrmv/g; $ft{'library'} += s/\bcusparseScsr2csc\b/hipsparseScsr2csc/g; $ft{'library'} += s/\bcusparseScsr2hyb\b/hipsparseScsr2hyb/g; + $ft{'library'} += s/\bcusparseScsrgeam\b/hipsparseScsrgeam/g; + $ft{'library'} += s/\bcusparseScsrgeam2\b/hipsparseScsrgeam2/g; + $ft{'library'} += s/\bcusparseScsrgeam2_bufferSizeExt\b/hipsparseScsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseScsrgemm\b/hipsparseScsrgemm/g; $ft{'library'} += s/\bcusparseScsrgemm2\b/hipsparseScsrgemm2/g; $ft{'library'} += s/\bcusparseScsrgemm2_bufferSizeExt\b/hipsparseScsrgemm2_bufferSizeExt/g; @@ -820,6 +844,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseScsrmm\b/hipsparseScsrmm/g; $ft{'library'} += s/\bcusparseScsrmm2\b/hipsparseScsrmm2/g; $ft{'library'} += s/\bcusparseScsrmv\b/hipsparseScsrmv/g; + $ft{'library'} += s/\bcusparseScsrsm2_analysis\b/hipsparseScsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseScsrsm2_bufferSizeExt\b/hipsparseScsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseScsrsm_solve\b/hipsparseScsrsm_solve/g; $ft{'library'} += s/\bcusparseScsrsv2_analysis\b/hipsparseScsrsv2_analysis/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSize\b/hipsparseScsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseScsrsv2_bufferSizeExt\b/hipsparseScsrsv2_bufferSizeExt/g; @@ -834,6 +861,8 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseSgthr\b/hipsparseSgthr/g; $ft{'library'} += s/\bcusparseSgthrz\b/hipsparseSgthrz/g; $ft{'library'} += s/\bcusparseShybmv\b/hipsparseShybmv/g; + $ft{'library'} += s/\bcusparseSnnz\b/hipsparseSnnz/g; + $ft{'library'} += s/\bcusparseSnnz_compress\b/hipsparseSnnz_compress/g; $ft{'library'} += s/\bcusparseSroti\b/hipsparseSroti/g; $ft{'library'} += s/\bcusparseSsctr\b/hipsparseSsctr/g; $ft{'library'} += s/\bcusparseXbsrilu02_zeroPivot\b/hipsparseXbsrilu02_zeroPivot/g; @@ -844,15 +873,22 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseXcscsort\b/hipsparseXcscsort/g; $ft{'library'} += s/\bcusparseXcscsort_bufferSizeExt\b/hipsparseXcscsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsr2coo\b/hipsparseXcsr2coo/g; + $ft{'library'} += s/\bcusparseXcsrgeam2Nnz\b/hipsparseXcsrgeam2Nnz/g; + $ft{'library'} += s/\bcusparseXcsrgeamNnz\b/hipsparseXcsrgeamNnz/g; $ft{'library'} += s/\bcusparseXcsrgemm2Nnz\b/hipsparseXcsrgemm2Nnz/g; $ft{'library'} += s/\bcusparseXcsrgemmNnz\b/hipsparseXcsrgemmNnz/g; $ft{'library'} += s/\bcusparseXcsrilu02_zeroPivot\b/hipsparseXcsrilu02_zeroPivot/g; + $ft{'library'} += s/\bcusparseXcsrsm2_zeroPivot\b/hipsparseXcsrsm2_zeroPivot/g; $ft{'library'} += s/\bcusparseXcsrsort\b/hipsparseXcsrsort/g; $ft{'library'} += s/\bcusparseXcsrsort_bufferSizeExt\b/hipsparseXcsrsort_bufferSizeExt/g; $ft{'library'} += s/\bcusparseXcsrsv2_zeroPivot\b/hipsparseXcsrsv2_zeroPivot/g; $ft{'library'} += s/\bcusparseZaxpyi\b/hipsparseZaxpyi/g; + $ft{'library'} += s/\bcusparseZbsrmv\b/hipsparseZbsrmv/g; $ft{'library'} += s/\bcusparseZcsr2csc\b/hipsparseZcsr2csc/g; $ft{'library'} += s/\bcusparseZcsr2hyb\b/hipsparseZcsr2hyb/g; + $ft{'library'} += s/\bcusparseZcsrgeam\b/hipsparseZcsrgeam/g; + $ft{'library'} += s/\bcusparseZcsrgeam2\b/hipsparseZcsrgeam2/g; + $ft{'library'} += s/\bcusparseZcsrgeam2_bufferSizeExt\b/hipsparseZcsrgeam2_bufferSizeExt/g; $ft{'library'} += s/\bcusparseZcsrgemm\b/hipsparseZcsrgemm/g; $ft{'library'} += s/\bcusparseZcsrgemm2\b/hipsparseZcsrgemm2/g; $ft{'library'} += s/\bcusparseZcsrgemm2_bufferSizeExt\b/hipsparseZcsrgemm2_bufferSizeExt/g; @@ -863,6 +899,9 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZcsrmm\b/hipsparseZcsrmm/g; $ft{'library'} += s/\bcusparseZcsrmm2\b/hipsparseZcsrmm2/g; $ft{'library'} += s/\bcusparseZcsrmv\b/hipsparseZcsrmv/g; + $ft{'library'} += s/\bcusparseZcsrsm2_analysis\b/hipsparseZcsrsm2_analysis/g; + $ft{'library'} += s/\bcusparseZcsrsm2_bufferSizeExt\b/hipsparseZcsrsm2_bufferSizeExt/g; + $ft{'library'} += s/\bcusparseZcsrsm_solve\b/hipsparseZcsrsm_solve/g; $ft{'library'} += s/\bcusparseZcsrsv2_analysis\b/hipsparseZcsrsv2_analysis/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSize\b/hipsparseZcsrsv2_bufferSize/g; $ft{'library'} += s/\bcusparseZcsrsv2_bufferSizeExt\b/hipsparseZcsrsv2_bufferSizeExt/g; @@ -872,6 +911,8 @@ sub simpleSubstitutions { $ft{'library'} += s/\bcusparseZgthr\b/hipsparseZgthr/g; $ft{'library'} += s/\bcusparseZgthrz\b/hipsparseZgthrz/g; $ft{'library'} += s/\bcusparseZhybmv\b/hipsparseZhybmv/g; + $ft{'library'} += s/\bcusparseZnnz\b/hipsparseZnnz/g; + $ft{'library'} += s/\bcusparseZnnz_compress\b/hipsparseZnnz_compress/g; $ft{'library'} += s/\bcusparseZsctr\b/hipsparseZsctr/g; $ft{'device_library'} += s/\bcurand\b/hiprand/g; $ft{'device_library'} += s/\bcurand_discrete\b/hiprand_discrete/g; @@ -997,6 +1038,8 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcsrgemm2Info\b/csrgemm2Info/g; $ft{'type'} += s/\bcsrgemm2Info_t\b/csrgemm2Info_t/g; $ft{'type'} += s/\bcsrilu02Info_t\b/csrilu02Info_t/g; + $ft{'type'} += s/\bcsrsm2Info\b/csrsm2Info/g; + $ft{'type'} += s/\bcsrsm2Info_t\b/csrsm2Info_t/g; $ft{'type'} += s/\bcsrsv2Info_t\b/csrsv2Info_t/g; $ft{'type'} += s/\bcuComplex\b/hipComplex/g; $ft{'type'} += s/\bcuDoubleComplex\b/hipDoubleComplex/g; @@ -1130,6 +1173,7 @@ sub simpleSubstitutions { $ft{'type'} += s/\bcurandStatus_t\b/hiprandStatus_t/g; $ft{'type'} += s/\bcusparseAction_t\b/hipsparseAction_t/g; $ft{'type'} += s/\bcusparseDiagType_t\b/hipsparseDiagType_t/g; + $ft{'type'} += s/\bcusparseDirection_t\b/hipsparseDirection_t/g; $ft{'type'} += s/\bcusparseFillMode_t\b/hipsparseFillMode_t/g; $ft{'type'} += s/\bcusparseHandle_t\b/hipsparseHandle_t/g; $ft{'type'} += s/\bcusparseHybMat_t\b/hipsparseHybMat_t/g; @@ -1398,6 +1442,8 @@ sub simpleSubstitutions { $ft{'numeric_literal'} += s/\bCUSPARSE_ACTION_SYMBOLIC\b/HIPSPARSE_ACTION_SYMBOLIC/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_NON_UNIT\b/HIPSPARSE_DIAG_TYPE_NON_UNIT/g; $ft{'numeric_literal'} += s/\bCUSPARSE_DIAG_TYPE_UNIT\b/HIPSPARSE_DIAG_TYPE_UNIT/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_COLUMN\b/HIPSPARSE_DIRECTION_COLUMN/g; + $ft{'numeric_literal'} += s/\bCUSPARSE_DIRECTION_ROW\b/HIPSPARSE_DIRECTION_ROW/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_LOWER\b/HIPSPARSE_FILL_MODE_LOWER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_FILL_MODE_UPPER\b/HIPSPARSE_FILL_MODE_UPPER/g; $ft{'numeric_literal'} += s/\bCUSPARSE_HYB_PARTITION_AUTO\b/HIPSPARSE_HYB_PARTITION_AUTO/g; diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 7edf27f3c7..0819a0364c 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -75,7 +75,6 @@ if(UNIX AND NOT APPLE AND NOT CYGWIN) endif() # And push it back to the cache set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) - message("Found HIP at ${HIP_ROOT_DIR}") endif() # Find HIPCC executable diff --git a/docs/doxygen-input/mainpage.txt b/docs/doxygen-input/mainpage.txt index 48177198cc..b5dcae3956 100644 --- a/docs/doxygen-input/mainpage.txt +++ b/docs/doxygen-input/mainpage.txt @@ -10,7 +10,7 @@ * - HIP is very thin and has little or no performance impact over coding directly in CUDA NVCC or HCC HC mode. * - HIP allows developers to use the "best" development environment and tools on each target platform. * - HIP allows coding in a single-source C++ programming language including features such as templates, C++11 lambdas,and more. - * - "hipify" tool automatically converts source from CUDA to HIP. + * - "HIPIFY" tools automatically convert CUDA sources to HIP. * - Developers can specialize for CUDA or HCC to tune for performance or handle tricky cases with #ifdef. * - See the @ref API. diff --git a/docs/markdown/CUSPARSE_API_supported_by_HIP.md b/docs/markdown/CUSPARSE_API_supported_by_HIP.md index fc7a8ee8cd..2c0a1bea9d 100644 --- a/docs/markdown/CUSPARSE_API_supported_by_HIP.md +++ b/docs/markdown/CUSPARSE_API_supported_by_HIP.md @@ -12,9 +12,9 @@ | enum |***`cusparseAction_t`*** | |***`hipsparseAction_t`*** | | 0 |*`CUSPARSE_ACTION_SYMBOLIC`* | |*`HIPSPARSE_ACTION_SYMBOLIC`* | | 1 |*`CUSPARSE_ACTION_NUMERIC`* | |*`HIPSPARSE_ACTION_NUMERIC`* | -| enum |***`cusparseDirection_t`*** | | | -| 0 |*`CUSPARSE_DIRECTION_ROW`* | | | -| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | | | +| enum |***`cusparseDirection_t`*** | |***`hipsparseDirection_t`*** | +| 0 |*`CUSPARSE_DIRECTION_ROW`* | |*`HIPSPARSE_DIRECTION_ROW`* | +| 1 |*`CUSPARSE_DIRECTION_COLUMN`* | |*`HIPSPARSE_DIRECTION_COLUMN`* | | enum |***`cusparseHybPartition_t`*** | |***`hipsparseHybPartition_t`*** | | 0 |*`CUSPARSE_HYB_PARTITION_AUTO`* | |*`HIPSPARSE_HYB_PARTITION_AUTO`* | | 1 |*`CUSPARSE_HYB_PARTITION_USER`* | |*`HIPSPARSE_HYB_PARTITION_USER`* | @@ -69,8 +69,8 @@ | typedef |`cusparseSolveAnalysisInfo_t` | | | | struct |`csrsv2Info` | | | | typedef |`csrsv2Info_t` | |`csrsv2Info_t` | -| struct |`csrsm2Info` | 9.2 | | -| typedef |`csrsm2Info_t` | | | +| struct |`csrsm2Info` | 9.2 |`csrsm2Info` | +| typedef |`csrsm2Info_t` | |`csrsm2Info_t` | | struct |`bsrsv2Info` | | | | typedef |`bsrsv2Info_t` | | | | struct |`bsrsm2Info` | | | @@ -151,8 +151,8 @@ |`cusparseGetStream` |`hipsparseGetStream` | 8.0 | |`cusparseCreateCsrsv2Info` |`hipsparseCreateCsrsv2Info` | |`cusparseDestroyCsrsv2Info` |`hipsparseDestroyCsrsv2Info` | -|`cusparseCreateCsrsm2Info` | | 9.2 | -|`cusparseDestroyCsrsm2Info` | | 9.2 | +|`cusparseCreateCsrsm2Info` |`hipsparseCreateCsrsm2Info` | 9.2 | +|`cusparseDestroyCsrsm2Info` |`hipsparseDestroyCsrsm2Info` | 9.2 | |`cusparseCreateCsric02Info` | | |`cusparseDestroyCsric02Info` | | |`cusparseCreateCsrilu02Info` |`hipsparseCreateCsrilu02Info` | @@ -203,10 +203,10 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|-------------------------------------------------|:----------------:| -|`cusparseSbsrmv` | | -|`cusparseDbsrmv` | | -|`cusparseCbsrmv` | | -|`cusparseZbsrmv` | | +|`cusparseSbsrmv` |`hipsparseSbsrmv` | +|`cusparseDbsrmv` |`hipsparseDbsrmv` | +|`cusparseCbsrmv` |`hipsparseCbsrmv` | +|`cusparseZbsrmv` |`hipsparseZbsrmv` | |`cusparseSbsrxmv` | | |`cusparseDbsrxmv` | | |`cusparseCbsrxmv` | | @@ -306,19 +306,19 @@ |`cusparseDcsrsm_solve` | | |`cusparseCcsrsm_solve` | | |`cusparseZcsrsm_solve` | | -|`cusparseScsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseDcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseCcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseZcsrsm2_bufferSizeExt` | | 9.2 | -|`cusparseScsrsm2_analysis` | | 9.2 | -|`cusparseDcsrsm2_analysis` | | 9.2 | -|`cusparseCcsrsm2_analysis` | | 9.2 | -|`cusparseZcsrsm2_analysis` | | 9.2 | -|`cusparseScsrsm2_solve` | | 9.2 | -|`cusparseDcsrsm2_solve` | | 9.2 | -|`cusparseCcsrsm2_solve` | | 9.2 | -|`cusparseZcsrsm2_solve` | | 9.2 | -|`cusparseXcsrsm2_zeroPivot` | | 9.2 | +|`cusparseScsrsm2_bufferSizeExt` |`hipsparseScsrsm2_bufferSizeExt` | 9.2 | +|`cusparseDcsrsm2_bufferSizeExt` |`hipsparseDcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseCcsrsm2_bufferSizeExt` |`hipsparseCcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseZcsrsm2_bufferSizeExt` |`hipsparseZcsrsm2_bufferSizeExt` | 9.2 | +|`cusparseScsrsm2_analysis` |`hipsparseScsrsm2_analysis` | 9.2 | +|`cusparseDcsrsm2_analysis` |`hipsparseDcsrsm2_analysis` | 9.2 | +|`cusparseCcsrsm2_analysis` |`hipsparseCcsrsm2_analysis` | 9.2 | +|`cusparseZcsrsm2_analysis` |`hipsparseZcsrsm2_analysis` | 9.2 | +|`cusparseScsrsm2_solve` |`hipsparseScsrsm2_solve` | 9.2 | +|`cusparseDcsrsm2_solve` |`hipsparseDcsrsm2_solve` | 9.2 | +|`cusparseCcsrsm2_solve` |`hipsparseCcsrsm2_solve` | 9.2 | +|`cusparseZcsrsm2_solve` |`hipsparseZcsrsm2_solve` | 9.2 | +|`cusparseXcsrsm2_zeroPivot` |`hipsparseXcsrsm2_zeroPivot` | 9.2 | |`cusparseSbsrmm` | | |`cusparseDbsrmm` | | |`cusparseCbsrmm` | | @@ -349,20 +349,20 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|-------------------------------------------------|:----------------:| -|`cusparseXcsrgeamNnz` | | -|`cusparseScsrgeam` | | -|`cusparseDcsrgeam` | | -|`cusparseCcsrgeam` | | -|`cusparseZcsrgeam` | | -|`cusparseXcsrgeam2Nnz` | | 9.2 | -|`cusparseScsrgeam2` | | 9.2 | -|`cusparseDcsrgeam2` | | 9.2 | -|`cusparseCcsrgeam2` | | 9.2 | -|`cusparseZcsrgeam2` | | 9.2 | -|`cusparseScsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseDcsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseCcsrgeam2_bufferSizeExt` | | 9.2 | -|`cusparseZcsrgeam2_bufferSizeExt` | | 9.2 | +|`cusparseXcsrgeamNnz` |`hipsparseXcsrgeamNnz` | +|`cusparseScsrgeam` |`hipsparseScsrgeam` | +|`cusparseDcsrgeam` |`hipsparseDcsrgeam` | +|`cusparseCcsrgeam` |`hipsparseCcsrgeam` | +|`cusparseZcsrgeam` |`hipsparseZcsrgeam` | +|`cusparseXcsrgeam2Nnz` |`hipsparseXcsrgeam2Nnz` | 9.2 | +|`cusparseScsrgeam2` |`hipsparseScsrgeam2` | 9.2 | +|`cusparseDcsrgeam2` |`hipsparseDcsrgeam2` | 9.2 | +|`cusparseCcsrgeam2` |`hipsparseCcsrgeam2` | 9.2 | +|`cusparseZcsrgeam2` |`hipsparseZcsrgeam2` | 9.2 | +|`cusparseScsrgeam2_bufferSizeExt` |`hipsparseScsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseDcsrgeam2_bufferSizeExt` |`hipsparseDcsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseCcsrgeam2_bufferSizeExt` |`hipsparseCcsrgeam2_bufferSizeExt` | 9.2 | +|`cusparseZcsrgeam2_bufferSizeExt` |`hipsparseZcsrgeam2_bufferSizeExt` | 9.2 | |`cusparseXcsrgemmNnz` |`hipsparseXcsrgemmNnz` | |`cusparseScsrgemm` |`hipsparseScsrgemm` | |`cusparseDcsrgemm` |`hipsparseDcsrgemm` | @@ -378,7 +378,6 @@ |`cusparseCcsrgemm2_bufferSizeExt` |`hipsparseCcsrgemm2_bufferSizeExt` | |`cusparseZcsrgemm2_bufferSizeExt` |`hipsparseZcsrgemm2_bufferSizeExt` | - ## **7. cuSPARSE Preconditioners Reference** ## ***7.1. Incomplete Cholesky Factorization: level 0*** @@ -662,10 +661,10 @@ |`cusparseDhyb2dense` | | |`cusparseChyb2dense` | | |`cusparseZhyb2dense` | | -|`cusparseSnnz` | | -|`cusparseDnnz` | | -|`cusparseCnnz` | | -|`cusparseZnnz` | | +|`cusparseSnnz` |`cusparseSnnz` | +|`cusparseDnnz` |`cusparseDnnz` | +|`cusparseCnnz` |`cusparseCnnz` | +|`cusparseZnnz` |`cusparseZnnz` | |`cusparseCreateIdentityPermutation` |`hipsparseCreateIdentityPermutation` | |`cusparseXcoosort_bufferSizeExt` |`hipsparseXcoosort_bufferSizeExt` | |`cusparseXcoosortByRow` |`hipsparseXcoosortByRow` | @@ -724,10 +723,10 @@ |`cusparseHpruneCsr2csrNnzByPercentage` | | 9.0 | |`cusparseSpruneCsr2csrNnzByPercentage` | | 9.0 | |`cusparseDpruneCsr2csrNnzByPercentage` | | 9.0 | -|`cusparseSnnz_compress` | | 8.0 | -|`cusparseDnnz_compress` | | 8.0 | -|`cusparseCnnz_compress` | | 8.0 | -|`cusparseZnnz_compress` | | 8.0 | +|`cusparseSnnz_compress` |`hipsparseSnnz_compress` | 8.0 | +|`cusparseDnnz_compress` |`hipsparseDnnz_compress` | 8.0 | +|`cusparseCnnz_compress` |`hipsparseCnnz_compress` | 8.0 | +|`cusparseZnnz_compress` |`hipsparseZnnz_compress` | 8.0 | ## **10. cuSPARSE Generic API Reference** diff --git a/docs/markdown/hip_debugging.md b/docs/markdown/hip_debugging.md index bf877d894e..fde17d410e 100644 --- a/docs/markdown/hip_debugging.md +++ b/docs/markdown/hip_debugging.md @@ -1,13 +1,13 @@ Table of Contents ================= - * [Profiling HIP Code](#profiling-hip-code" aria-hidden="true"> hipThreadIdx_x ). +### Do HIPIFY tools automatically convert all source code? +Typically, HIPIFY tools can automatically convert almost all run-time code, and the coordinate indexing device code ( threadIdx.x -> hipThreadIdx_x ). Most device code needs no additional conversion since HIP and CUDA have similar names for math and built-in functions. The hipify-clang tool will automatically modify the kernel signature as needed (automating a step that used to be done manually). -Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support. +Additional porting may be required to deal with architecture feature queries or with CUDA capabilities that HIP doesn't support. In general, developers should always expect to perform some platform-specific tuning and optimization. ### What is NVCC? diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md index 04847101f6..395a343764 100644 --- a/docs/markdown/hip_kernel_language.md +++ b/docs/markdown/hip_kernel_language.md @@ -125,7 +125,7 @@ MyKernel<<>> (a,b,c,n); ``` -The hipLaunchKernel macro always starts with the five parameters specified above, followed by the kernel arguments. The Hipify script optionally converts Cuda launch syntax to hipLaunchKernel, including conversion of optional arguments in <<< >>> to the five required hipLaunchKernel parameters. The dim3 constructor accepts zero to three arguments and will by default initialize unspecified dimensions to 1. See [dim3](#dim3). The kernel uses the coordinate built-ins (hipThread*, hipBlock*, hipGrid*) to determine coordinate index and coordinate bounds of the work item that’s currently executing. See [Coordinate Built-Ins](#coordinate-builtins). +The hipLaunchKernel macro always starts with the five parameters specified above, followed by the kernel arguments. HIPIFY tools optionally convert Cuda launch syntax to hipLaunchKernel, including conversion of optional arguments in <<< >>> to the five required hipLaunchKernel parameters. The dim3 constructor accepts zero to three arguments and will by default initialize unspecified dimensions to 1. See [dim3](#dim3). The kernel uses the coordinate built-ins (hipThread*, hipBlock*, hipGrid*) to determine coordinate index and coordinate bounds of the work item that’s currently executing. See [Coordinate Built-Ins](#coordinate-builtins). ## Kernel-Launch Example @@ -724,7 +724,7 @@ CUDA defines a __launch_bounds which is also designed to control occupancy: __launch_bounds(MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MULTIPROCESSOR) ``` -- The second parameter __launch_bounds parameters must be converted to the format used __hip_launch_bounds, which uses warps and execution-units rather than blocks and multi-processors ( This conversion is performed automatically by the clang hipify tools.) +- The second parameter __launch_bounds parameters must be converted to the format used __hip_launch_bounds, which uses warps and execution-units rather than blocks and multi-processors (this conversion is performed automatically by hipify tools). ``` MIN_WARPS_PER_EXECUTION_UNIT = (MIN_BLOCKS_PER_MULTIPROCESSOR * MAX_THREADS_PER_BLOCK) / 32 ``` diff --git a/docs/markdown/hip_porting_driver_api.md b/docs/markdown/hip_porting_driver_api.md index 8e66780add..af70f35bb6 100644 --- a/docs/markdown/hip_porting_driver_api.md +++ b/docs/markdown/hip_porting_driver_api.md @@ -68,8 +68,8 @@ HIP provides a `Ctx` API as a thin layer over the existing Device functions. Thi The current context is implicitly used by other APIs such as `hipStreamCreate`. ### hipify translation of CUDA Driver API -The hipify tool converts CUDA Driver APIs for streams, events, modules, devices, memory management, context, profiler to the equivalent HIP driver calls. For example, `cuEventCreate` will be translated to `hipEventCreate`. -Hipify also converts error code from the Driver namespace and coding convention to the equivalent HIP error code. Thus, HIP unifies the APIs for these common functions. +The HIPIFY tools convert CUDA Driver APIs for streams, events, modules, devices, memory management, context, profiler to the equivalent HIP driver calls. For example, `cuEventCreate` will be translated to `hipEventCreate`. +HIPIFY tools also convert error codes from the Driver namespace and coding convention to the equivalent HIP error code. Thus, HIP unifies the APIs for these common functions. The memory copy API requires additional explanation. The CUDA driver includes the memory direction in the name of the API (ie `cuMemcpyH2D`) while the CUDA driver API provides a single memory copy API with a parameter that specifies the direction and additionally supports a "default" direction where the runtime determines the direction automatically. HIP provides APIs with both styles: for example, `hipMemcpyH2D` as well as `hipMemcpy`. diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index 4855fa4cea..c291fa8ae6 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -1,7 +1,7 @@ -# HIP Porting Guide +# HIP Porting Guide In addition to providing a portable C++ programming environment for GPUs, HIP is designed to ease the porting of existing CUDA code into the HIP environment. This section describes the available tools -and provides practical suggestions on how to port CUDA code and work through common issues. +and provides practical suggestions on how to port CUDA code and work through common issues. ## Table of Contents @@ -57,13 +57,13 @@ and provides practical suggestions on how to port CUDA code and work through com - Starting the port on a Cuda machine is often the easiest approach, since you can incrementally port pieces of the code to HIP while leaving the rest in Cuda. (Recall that on Cuda machines HIP is just a thin layer over Cuda, so the two code types can interoperate on nvcc platforms.) Also, the HIP port can be compared with the original Cuda code for function and performance. - Once the Cuda code is ported to HIP and is running on the Cuda machine, compile the HIP code using hcc on an AMD machine. - HIP ports can replace Cuda versions: HIP can deliver the same performance as a native Cuda implementation, with the benefit of portability to both Nvidia and AMD architectures as well as a path to future C++ standard support. You can handle platform-specific features through conditional compilation or by adding them to the open-source HIP infrastructure. -- Use **[bin/hipconvertinplace.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace.sh)** to hipify all code files in the Cuda source directory. +- Use **[bin/hipconvertinplace-perl.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace-perl.sh)** to hipify all code files in the Cuda source directory. ### Scanning existing CUDA code to scope the porting effort -The hipexamine.sh tool will scan a source directory to determine which files contain CUDA code and how much of that code can be automatically hipified, +The hipexamine-perl.sh tool will scan a source directory to determine which files contain CUDA code and how much of that code can be automatically hipified, ``` > cd examples/rodinia_3.0/cuda/kmeans -> $HIP_DIR/bin/hipexamine.sh . +> $HIP_DIR/bin/hipexamine-perl.sh. info: hipify ./kmeans.h =====> info: hipify ./unistd.h =====> info: hipify ./kmeans.c =====> @@ -80,7 +80,7 @@ info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 s kernels (1 total) : kmeansPoint(1) ``` -hipexamine scans each code file (cpp, c, h, hpp, etc.) found in the specified directory: +hipexamine-perl scans each code file (cpp, c, h, hpp, etc.) found in the specified directory: * Files with no CUDA code (ie kmeans.h) print one line summary just listing the source file name. * Files with CUDA code print a summary of what was found - for example the kmeans_cuda_kernel.cu file: @@ -94,7 +94,7 @@ info: hipify ./kmeans_cuda_kernel.cu =====> * Warning for code that looks like CUDA API but was not converted (0 in this file). * Count Lines-of-Code (LOC) - 185 for this file. -* hipexamine also presents a summary at the end of the process for the statistics collected across all files. This has similar format to the per-file reporting, and also includes a list of all kernels which have been called. An example from above: +* hipexamine-perl also presents a summary at the end of the process for the statistics collected across all files. This has similar format to the per-file reporting, and also includes a list of all kernels which have been called. An example from above: ```shell info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 stream:0 event:0 err:0 def:0 tex:15 other:0 ) warn:0 LOC:3607 @@ -104,24 +104,24 @@ info: TOTAL-converted 89 CUDA->HIP refs( dev:3 mem:32 kern:2 builtin:37 math:0 s ### Converting a project "in-place" ```shell -> hipify --inplace +> hipify-perl --inplace ``` For each input file FILE, this script will: - - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then Hipify the code file. - - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. + - If "FILE.prehip file does not exist, copy the original code to a new file with extension ".prehip". Then hipify the code file. + - If "FILE.prehip" file exists, hipify FILE.prehip and save to FILE. This is useful for testing improvements to the hipify toolset. -The [hipconvertinplace.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace.sh) script will perform inplace conversion for all code files in the specified directory. +The [hipconvertinplace-perl.sh](https://github.com/ROCm-Developer-Tools/HIP/blob/master/bin/hipconvertinplace-perl.sh) script will perform inplace conversion for all code files in the specified directory. This can be quite handy when dealing with an existing CUDA code base since the script preserves the existing directory structure and filenames - and includes work. After converting in-place, you can review the code to add additional parameters to directory names. ```shell -> hipconvertinplace.sh MY_SRC_DIR +> hipconvertinplace-perl.sh MY_SRC_DIR ``` ### Library Equivalents @@ -402,11 +402,11 @@ You can capture the hipconfig output and passed it to the standard compiler; bel CPPFLAGS += $(shell $(HIP_PATH)/bin/hipconfig --cpp_config) ``` -nvcc includes some headers by default. However, HIP does not include default headers, and instead all required files must be explicitly included. -Specifically, files that call HIP run-time APIs or define HIP kernels must explicitly include the appropriate HIP headers. +nvcc includes some headers by default. However, HIP does not include default headers, and instead all required files must be explicitly included. +Specifically, files that call HIP run-time APIs or define HIP kernels must explicitly include the appropriate HIP headers. If the compilation process reports that it cannot find necessary APIs (for example, "error: identifier ‘hipSetDevice’ is undefined"), -ensure that the file includes hip_runtime.h (or hip_runtime_api.h, if appropriate). -The hipify script automatically converts "cuda_runtime.h" to "hip_runtime.h," and it converts "cuda_runtime_api.h" to "hip_runtime_api.h", but it may miss nested headers or macros. +ensure that the file includes hip_runtime.h (or hip_runtime_api.h, if appropriate). +The hipify-perl script automatically converts "cuda_runtime.h" to "hip_runtime.h," and it converts "cuda_runtime_api.h" to "hip_runtime_api.h", but it may miss nested headers or macros. #### cuda.h diff --git a/docs/markdown/hip_terms2.md b/docs/markdown/hip_terms2.md index be859ffb32..3b4661729d 100644 --- a/docs/markdown/hip_terms2.md +++ b/docs/markdown/hip_terms2.md @@ -1,18 +1,18 @@ # Terms used in HIP Documentation -- host, host cpu : Executes the HIP runtime API and is capable of initiating kernel launches to one or more devices. -- default device : Each host thread maintains a "default device". +- host, host cpu : Executes the HIP runtime API and is capable of initiating kernel launches to one or more devices. +- default device : Each host thread maintains a "default device". Most HIP runtime APIs (including memory allocation, copy commands, kernel launches) do not use accept an explicit device argument but instead implicitly use the default device. The default device can be set with hipSetDevice. -- "active host thread" - the thread which is running the HIP APIs. +- "active host thread" - the thread which is running the HIP APIs. -- completion_future becomes ready. "Completes" +- completion_future becomes ready. "Completes". -- hcc = Heterogeneous Compute Compiler (https://bitbucket.org/multicoreware/hcc/wiki/Home). +- hcc = Heterogeneous Compute Compiler (https://bitbucket.org/multicoreware/hcc/wiki/Home). -- hipify - tool to convert CUDA(R) code to portable C++ code. +- hipify tools - tools to convert CUDA(R) code to portable C++ code (https://github.com/ROCm-Developer-Tools/HIPIFY). - hipconfig - tool to report various configuration properties of the target platform. - nvcc = nvcc compiler, do not capitalize. diff --git a/hip-config-clang.cmake.in b/hip-config-clang.cmake.in deleted file mode 100644 index 7344458ffd..0000000000 --- a/hip-config-clang.cmake.in +++ /dev/null @@ -1,114 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -if(CMAKE_CXX_COMPILER MATCHES ".*hipcc") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version - OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) - if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") - set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) - else() - set(HIP_CLANG_ROOT /opt/rocm/llvm) - endif() -else() - get_filename_component(HIP_CLANG_ROOT "${CMAKE_CXX_COMPILER}" PATH) - get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) -endif() -file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) -find_path(HIP_CLANG_INCLUDE_PATH stddef.h - HINTS - ${HIP_CLANG_INCLUDE_SEARCH_PATHS} - NO_DEFAULT_PATH) -find_dependency(amd_comgr) -find_dependency(AMDDeviceLibs) -set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") -set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") - -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}" -) - -foreach(GPU_TARGET ${GPU_TARGETS}) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" - ) - set_property(TARGET hip::device APPEND PROPERTY - INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" - ) -endforeach() - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config-hcc.cmake.in b/hip-config-hcc.cmake.in deleted file mode 100644 index c0ffc6e2af..0000000000 --- a/hip-config-hcc.cmake.in +++ /dev/null @@ -1,68 +0,0 @@ -@PACKAGE_INIT@ - -include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) -if (NOT _CMakeFindDependencyMacro_FOUND) - macro(find_dependency dep) - if (NOT ${dep}_FOUND) - set(cmake_fd_version) - if (${ARGC} GREATER 1) - set(cmake_fd_version ${ARGV1}) - endif() - set(cmake_fd_exact_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) - set(cmake_fd_exact_arg EXACT) - endif() - set(cmake_fd_quiet_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) - set(cmake_fd_quiet_arg QUIET) - endif() - set(cmake_fd_required_arg) - if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) - set(cmake_fd_required_arg REQUIRED) - endif() - find_package(${dep} ${cmake_fd_version} - ${cmake_fd_exact_arg} - ${cmake_fd_quiet_arg} - ${cmake_fd_required_arg} - ) - string(TOUPPER ${dep} cmake_dep_upper) - if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) - set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") - set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) - return() - endif() - set(cmake_fd_version) - set(cmake_fd_required_arg) - set(cmake_fd_quiet_arg) - set(cmake_fd_exact_arg) - endif() - endmacro() -endif() - -set(HIP_COMPILER "@HIP_COMPILER@") -set(HIP_RUNTIME "@HIP_RUNTIME@") - -set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) -set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) -set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) -set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) - -set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") -set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") - -find_dependency(hcc) -find_dependency(amd_comgr) -include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) - -set( hip_LIBRARIES hip::host hip::device) -set( hip_LIBRARY ${hip_LIBRARIES}) - -set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) -set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) -set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) -set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) -set(HIP_LIBRARIES ${hip_LIBRARIES}) -set(HIP_LIBRARY ${hip_LIBRARY}) -set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) -set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) - diff --git a/hip-config.cmake.in b/hip-config.cmake.in new file mode 100644 index 0000000000..859e2fa0fc --- /dev/null +++ b/hip-config.cmake.in @@ -0,0 +1,184 @@ +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro OPTIONAL RESULT_VARIABLE _CMakeFindDependencyMacro_FOUND) +if (NOT _CMakeFindDependencyMacro_FOUND) + macro(find_dependency dep) + if (NOT ${dep}_FOUND) + set(cmake_fd_version) + if (${ARGC} GREATER 1) + set(cmake_fd_version ${ARGV1}) + endif() + set(cmake_fd_exact_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_VERSION_EXACT) + set(cmake_fd_exact_arg EXACT) + endif() + set(cmake_fd_quiet_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY) + set(cmake_fd_quiet_arg QUIET) + endif() + set(cmake_fd_required_arg) + if(${CMAKE_FIND_PACKAGE_NAME}_FIND_REQUIRED) + set(cmake_fd_required_arg REQUIRED) + endif() + find_package(${dep} ${cmake_fd_version} + ${cmake_fd_exact_arg} + ${cmake_fd_quiet_arg} + ${cmake_fd_required_arg} + ) + string(TOUPPER ${dep} cmake_dep_upper) + if (NOT ${dep}_FOUND AND NOT ${cmake_dep_upper}_FOUND) + set(${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE "${CMAKE_FIND_PACKAGE_NAME} could not be found because dependency ${dep} could not be found.") + set(${CMAKE_FIND_PACKAGE_NAME}_FOUND False) + return() + endif() + set(cmake_fd_version) + set(cmake_fd_required_arg) + set(cmake_fd_quiet_arg) + set(cmake_fd_exact_arg) + endif() + endmacro() +endif() + +set(HIP_COMPILER "@HIP_COMPILER@") +set(HIP_RUNTIME "@HIP_RUNTIME@") + +set_and_check( hip_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@" ) +set_and_check( hip_INCLUDE_DIRS "${hip_INCLUDE_DIR}" ) +set_and_check( hip_LIB_INSTALL_DIR "@PACKAGE_LIB_INSTALL_DIR@" ) +set_and_check( hip_BIN_INSTALL_DIR "@PACKAGE_BIN_INSTALL_DIR@" ) + +set_and_check(hip_HIPCC_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipcc") +set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig") + +if(HIP_COMPILER STREQUAL "clang") + if(NOT HIP_CXX_COMPILER) + set(HIP_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + endif() + if(HIP_CXX_COMPILER MATCHES ".*hipcc") + execute_process(COMMAND ${HIP_CXX_COMPILER} --version + OUTPUT_STRIP_TRAILING_WHITESPACE + OUTPUT_VARIABLE HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT) + if(HIP_CLANG_CXX_COMPILER_VERSION_OUTPUT MATCHES "InstalledDir:[\t\r\n][\t\r\n]*([^\t\r\n])") + set(HIP_CLANG_ROOT ${CMAKE_MATCH_1}) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) + endif() + elseif (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + get_filename_component(HIP_CLANG_ROOT "${HIP_CXX_COMPILER}" PATH) + get_filename_component(HIP_CLANG_ROOT "${HIP_CLANG_ROOT}" PATH) + else() + set(HIP_CLANG_ROOT /opt/rocm/llvm) + endif() + file(GLOB HIP_CLANG_INCLUDE_SEARCH_PATHS ${HIP_CLANG_ROOT}/lib/clang/*/include) + find_path(HIP_CLANG_INCLUDE_PATH stddef.h + HINTS + ${HIP_CLANG_INCLUDE_SEARCH_PATHS} + NO_DEFAULT_PATH) + find_dependency(AMDDeviceLibs) + set(AMDGPU_TARGETS "gfx900;gfx906" CACHE STRING "AMD GPU targets to compile for") + set(GPU_TARGETS "${AMDGPU_TARGETS}" CACHE STRING "GPU targets to compile for") +else() + find_dependency(hcc) +endif() + +find_dependency(amd_comgr) + +include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" ) + +#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm +if( DEFINED ENV{ROCM_PATH} ) + set(ROCM_PATH "$ENV{ROCM_PATH}") +endif() + +#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip +#and do three level up again +get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) +get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) + +#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= +find_path(HSA_HEADER hsa/hsa.h + PATHS + "${_IMPORT_PREFIX}/../include" + /opt/rocm/include +) + +if (HSA_HEADER-NOTFOUND) + message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") +endif() + +# Right now this is only supported for amd platforms +set_target_properties(hip::host PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_PLATFORM_HCC__=1" +) + +if(HIP_RUNTIME MATCHES "VDI") + set_target_properties(hip::amdhip64 PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_COMPILE_DEFINITIONS "__HIP_VDI__=1" + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +else() + set_target_properties(hip::hip_hcc_static PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}") + + set_target_properties(hip::hip_hcc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" + ) + set_target_properties(hip::device PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" + ) +endif() + +if(HIP_COMPILER STREQUAL "clang") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -x hip --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib + ) + + if (HIP_CXX_COMPILER MATCHES ".*clang\\+\\+") + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false + ) + endif() + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES --hip-device-lib-path=${AMD_DEVICE_LIBS_PREFIX}/lib --hip-link + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." + ) + + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${HIP_CLANG_INCLUDE_PATH}/.." + ) + + foreach(GPU_TARGET ${GPU_TARGETS}) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_COMPILE_OPTIONS "--cuda-gpu-arch=${GPU_TARGET}" + ) + set_property(TARGET hip::device APPEND PROPERTY + INTERFACE_LINK_LIBRARIES "--cuda-gpu-arch=${GPU_TARGET}" + ) + endforeach() +endif() + +set( hip_LIBRARIES hip::host hip::device) +set( hip_LIBRARY ${hip_LIBRARIES}) + +set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR}) +set(HIP_INCLUDE_DIRS ${hip_INCLUDE_DIRS}) +set(HIP_LIB_INSTALL_DIR ${hip_LIB_INSTALL_DIR}) +set(HIP_BIN_INSTALL_DIR ${hip_BIN_INSTALL_DIR}) +set(HIP_LIBRARIES ${hip_LIBRARIES}) +set(HIP_LIBRARY ${hip_LIBRARY}) +set(HIP_HIPCC_EXECUTABLE ${hip_HIPCC_EXECUTABLE}) +set(HIP_HIPCONFIG_EXECUTABLE ${hip_HIPCONFIG_EXECUTABLE}) + diff --git a/hip_prof_gen.py b/hip_prof_gen.py index d2da7cd4df..475d28186b 100755 --- a/hip_prof_gen.py +++ b/hip_prof_gen.py @@ -43,7 +43,7 @@ def filtr_api_args(args_str): args_str = re.sub(r'\s*$', r'', args_str); args_str = re.sub(r'\s*,\s*', r',', args_str); args_str = re.sub(r'\s+', r' ', args_str); - args_str = re.sub(r'void \*', r'void* ', args_str); + args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); args_str = re.sub(r'(enum|struct) ', '', args_str); return args_str @@ -306,6 +306,7 @@ def generate_prof_header(f, api_map, opts_map): f.write('// automatically generated sources\n') f.write('#ifndef _HIP_PROF_STR_H\n'); f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') # Generating dummy macro for non-public API f.write('\n// Dummy API primitives\n') @@ -388,30 +389,29 @@ def generate_prof_header(f, api_map, opts_map): f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') # Generating the method for the API string, name and parameters - if False: - f.write('\n') - f.write('#if 0\n') - f.write('#include \n'); - f.write('#include \n'); - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') + f.write('\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); @@ -472,7 +472,8 @@ not_found = 0 if len(opts_map) != 0: for name in api_map.keys(): args_str = api_map[name]; - api_map[name] = list_api_args(args_str) + args_list = list_api_args(args_str) + api_map[name] = args_list if not name in opts_map: error("implementation not found: " + name) not_found += 1 diff --git a/hipify-clang/CMakeLists.txt b/hipify-clang/CMakeLists.txt deleted file mode 100644 index 875b5dad74..0000000000 --- a/hipify-clang/CMakeLists.txt +++ /dev/null @@ -1,198 +0,0 @@ -cmake_minimum_required(VERSION 3.5.1) - -project(hipify-clang) - -if (MSVC AND MSVC_VERSION VERSION_LESS "1900") - message(SEND_ERROR "hipify-clang could be built by Visual Studio 14 2015 or higher.") - return() -endif() - -find_package(LLVM REQUIRED) -message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}:") -message(STATUS " - CMake module path: ${LLVM_CMAKE_DIR}") -message(STATUS " - Include path : ${LLVM_INCLUDE_DIRS}") -message(STATUS " - Binary path : ${LLVM_TOOLS_BINARY_DIR}") - -option(HIPIFY_CLANG_TESTS "Build the tests for hipify-clang, if lit is installed" OFF) - -list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) -include(AddLLVM) - -include_directories(${LLVM_INCLUDE_DIRS}) -link_directories(${LLVM_LIBRARY_DIRS}) -add_definitions(${LLVM_DEFINITIONS}) - -file(GLOB_RECURSE HIPIFY_SOURCES src/*.cpp) -file(GLOB_RECURSE HIPIFY_HEADERS src/*.h) -add_llvm_executable(hipify-clang ${HIPIFY_SOURCES} ${HIPIFY_HEADERS}) - -set(CMAKE_CXX_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang++) -set(CMAKE_C_COMPILER ${LLVM_TOOLS_BINARY_DIR}/clang) - -# Link against LLVM and CLANG libraries -target_link_libraries(hipify-clang PRIVATE - clangASTMatchers - clangFrontend - clangTooling - clangParse - clangSerialization - clangSema - clangEdit - clangFormat - clangLex - clangAnalysis - clangDriver - clangAST - clangToolingCore - clangRewrite - clangBasic - LLVMProfileData - LLVMSupport - LLVMMCParser - LLVMMC - LLVMBitReader - LLVMOption - LLVMCore) - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "6.0.1") - target_link_libraries(hipify-clang PRIVATE clangToolingInclusions) -endif() - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "9.0.1") - target_link_libraries(hipify-clang PRIVATE LLVMFrontendOpenMP) -endif() - -if (MSVC) - target_link_libraries(hipify-clang PRIVATE version) - target_compile_options(hipify-clang PRIVATE "/Od /GR- /EHs- /EHc-") - set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} /SUBSYSTEM:WINDOWS") - set(StdCpp "/std:c++") -else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -fno-rtti -fvisibility-inlines-hidden") - set(StdCpp "-std=c++") -endif() - -if (LLVM_PACKAGE_VERSION VERSION_GREATER "9.0") - string(APPEND StdCpp "14") -# MSVC starting from 1900 (VS 2015) supports only the following c++ std values: c++14|c++17|c++latest -elseif (MSVC) - set(StdCpp "") -else() - string(APPEND StdCpp "11") -endif() - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_CFLAGS}") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CFLAGS} ${StdCpp} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DIRS}/clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}\\\"") - -set(INSTALL_PATH_DOC_STRING "Installation path for hipify-clang") -set(HIPIFY_INSTALL_PATH ${CMAKE_INSTALL_PREFIX}) -if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - if(CMAKE_BUILD_TYPE MATCHES Debug) - set(HIPIFY_INSTALL_PATH "${CMAKE_CURRENT_SOURCE_DIR}/bin" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - elseif(CMAKE_BUILD_TYPE MATCHES Release) - if (BIN_INSTALL_DIR) - set(HIPIFY_INSTALL_PATH "${BIN_INSTALL_DIR}" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - else() - set(HIPIFY_INSTALL_PATH "${PROJECT_BINARY_DIR}/bin" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) - endif() - else() - message(FATAL_ERROR "Invalid CMAKE_BUILD_TYPE specified. Valid values are Debug and Release") - endif() -elseif(BIN_INSTALL_DIR) - set(HIPIFY_INSTALL_PATH "${BIN_INSTALL_DIR}" CACHE PATH ${INSTALL_PATH_DOC_STRING} FORCE) -endif() - -install(TARGETS hipify-clang DESTINATION ${HIPIFY_INSTALL_PATH}) - -install( - DIRECTORY ${LLVM_DIR}/../../clang/${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}/ - DESTINATION ${HIPIFY_INSTALL_PATH} - COMPONENT clang-resource-headers - FILES_MATCHING - PATTERN "*.h" - PATTERN "*.modulemap" - PATTERN "algorithm" - PATTERN "complex" - PATTERN "new" - PATTERN "ppc_wrappers" EXCLUDE - PATTERN "openmp_wrappers" EXCLUDE) - -if (UNIX) - set(BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/packages/hipify-clang) - configure_file(packaging/hipify-clang.txt ${BUILD_DIR}/CMakeLists.txt @ONLY) - add_custom_target(pkg_hipify-clang COMMAND ${CMAKE_COMMAND} . - COMMAND rm -rf *.deb *.rpm *.tar.gz - COMMAND make package - COMMAND cp *.deb ${PROJECT_BINARY_DIR} - COMMAND cp *.rpm ${PROJECT_BINARY_DIR} - COMMAND cp *.tar.gz ${PROJECT_BINARY_DIR} - WORKING_DIRECTORY ${BUILD_DIR}) - - file(GENERATE OUTPUT ${PROJECT_BINARY_DIR}/fixnames - CONTENT "pwd; for i in *.deb; do mv \"\$i\" \"\${i/.deb/-amd64.deb}\" ; done - for i in *.rpm ; do mv \$i \${i/.rpm/.x86_64.rpm} ; done") - - add_custom_target(package_hipify-clang - COMMAND bash ${PROJECT_BINARY_DIR}/fixnames - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - DEPENDS pkg_hipify-clang) -endif() - -if (HIPIFY_CLANG_TESTS) - find_package(PythonInterp 2.7 REQUIRED) - - function (require_program PROGRAM_NAME) - find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) - if (FOUND_${PROGRAM_NAME}) - message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") - else() - message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS to OFF to disable hipify tests, or install the missing program.") - endif() - endfunction() - - require_program(lit) - require_program(FileCheck) - - find_package(CUDA REQUIRED) - if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR - (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR - (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR - (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0") OR - (CUDA_VERSION VERSION_GREATER "9.2" AND LLVM_PACKAGE_VERSION VERSION_LESS "8.0") OR - (CUDA_VERSION VERSION_GREATER "10.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "9.0") OR - (CUDA_VERSION VERSION_GREATER "10.1" AND LLVM_PACKAGE_VERSION VERSION_LESS "10.0")) - message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by LLVM ${LLVM_PACKAGE_VERSION}.") - if (CUDA_VERSION_MAJOR VERSION_LESS "7") - message(STATUS "Please install CUDA 7.0 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "8") - message(STATUS "Please install LLVM + clang 3.8 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "9") - message(STATUS "Please install LLVM + clang 4.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "9.0") - message(STATUS "Please install LLVM + clang 6.0 or higher.") - elseif (CUDA_VERSION_MAJOR VERSION_LESS "10") - message(STATUS "Please install LLVM + clang 7.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.0") - message(STATUS "Please install LLVM + clang 8.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.1") - message(STATUS "Please install LLVM + clang 9.0 or higher.") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - message(STATUS "Please install LLVM + clang 10.0 or higher.") - endif() - endif() - - configure_file( - ${CMAKE_CURRENT_LIST_DIR}/../tests/hipify-clang/lit.site.cfg.in - ${CMAKE_CURRENT_BINARY_DIR}/tests/hipify-clang/lit.site.cfg - @ONLY) - - add_lit_testsuite(test-hipify "Running HIPify regression tests" - ${CMAKE_CURRENT_LIST_DIR}/../tests/hipify-clang - PARAMS site_config=${CMAKE_CURRENT_BINARY_DIR}/tests/hipify-clang/lit.site.cfg - ARGS -v - DEPENDS hipify-clang) - - add_custom_target(test-hipify-clang) - add_dependencies(test-hipify-clang test-hipify) - set_target_properties(test-hipify-clang PROPERTIES FOLDER "Tests") -endif() diff --git a/hipify-clang/README.md b/hipify-clang/README.md deleted file mode 100644 index 88d7a72ccd..0000000000 --- a/hipify-clang/README.md +++ /dev/null @@ -1,506 +0,0 @@ -# HIPIFY -### Tools to translate CUDA source code into portable HIP C++ automatically -## Table of Contents - - - -- [hipify-clang](#clang) - * [Dependencies](#dependencies) - * [Usage](#hipify-clang-usage) - * [Building](#building) - * [Testing](#testing) - * [Linux](#linux) - * [Windows](#windows) -- [hipify-perl](#perl) - * [Usage](#hipify-perl-usage) - * [Building](#hipify-perl-building) -- [Supported CUDA APIs](#cuda-apis) -- [Disclaimer](#disclaimer) - - - -## hipify-clang - -`hipify-clang` is a clang-based tool for translation CUDA sources into HIP sources. -It translates CUDA source into an abstract syntax tree, which is being traversed by transformation matchers. -After applying all the matchers, the output HIP source is produced. - -**Advantages:** - -1. It is a translator; thus, any even very complicated constructs will be parsed successfully, or an error will be reported. -2. It supports clang options like [`-I`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-i-dir), [`-D`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-d-macro), [`--cuda-path`](https://clang.llvm.org/docs/ClangCommandLineReference.html#cmdoption-clang-cuda-path), etc. -3. Seamless support of new CUDA versions as it is clang's responsibility. -4. Ease in support. - -**Disadvantages:** - -1. The main advantage is also the main disadvantage: the input CUDA code should be correct; incorrect code wouldn't be translated to HIP. -2. CUDA should be installed and provided in case of multiple installations by `--cuda-path` option. -3. All the includes and defines should be provided to transform code successfully. - -### hipify-clang: dependencies - -`hipify-clang` requires: - -1. [**LLVM+CLANG**](http://releases.llvm.org) of at least version [3.8.0](http://releases.llvm.org/download.html#3.8.0); the latest stable and recommended release: [**9.0.1**](http://releases.llvm.org/download.html#9.0.1), the latest release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). - -2. [**CUDA**](https://developer.nvidia.com/cuda-downloads) of at least version [7.0](https://developer.nvidia.com/cuda-toolkit-70), the latest supported version is [**10.1 Update 2**](https://developer.nvidia.com/cuda-10.1-download-archive-base). -To use the latest CUDA version [10.2](https://developer.nvidia.com/cuda-downloads) please use the latest `LLVM` release candidate: [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3). - -| **LLVM release version** | **CUDA latest supported version** | **Windows** | **Linux** | -|:----------------------------------------------------------:|:------------------------------------------------------------------------:|:-----------:|:---------:| -| [3.8.0](http://releases.llvm.org/download.html#3.8.0) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.8.1](http://releases.llvm.org/download.html#3.8.1) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.9.0](http://releases.llvm.org/download.html#3.9.0) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [3.9.1](http://releases.llvm.org/download.html#3.9.1) | [7.5](https://developer.nvidia.com/cuda-75-downloads-archive) | + | + | -| [4.0.0](http://releases.llvm.org/download.html#4.0.0) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [4.0.1](http://releases.llvm.org/download.html#4.0.1) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.0](http://releases.llvm.org/download.html#5.0.0) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.1](http://releases.llvm.org/download.html#5.0.1) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [5.0.2](http://releases.llvm.org/download.html#5.0.2) | [8.0](https://developer.nvidia.com/cuda-80-ga2-download-archive) | + | + | -| [6.0.0](http://releases.llvm.org/download.html#6.0.0) | [9.0](https://developer.nvidia.com/cuda-90-download-archive) | + | + | -| [6.0.1](http://releases.llvm.org/download.html#6.0.1) | [9.0](https://developer.nvidia.com/cuda-90-download-archive) | + | + | -| [7.0.0](http://releases.llvm.org/download.html#7.0.0) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.0.0_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [7.0.1](http://releases.llvm.org/download.html#7.0.1) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.0.1_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [7.1.0](http://releases.llvm.org/download.html#7.1.0) | [9.2](https://developer.nvidia.com/cuda-92-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_7.1.0_bug_38811.zip)*
| -
not working due to
the clang's bug [36384](https://bugs.llvm.org/show_bug.cgi?id=36384) | -| [8.0.0](http://releases.llvm.org/download.html#8.0.0) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.0_bug_38811.zip)*
| + | -| [8.0.1](http://releases.llvm.org/download.html#8.0.1) | [10.0](https://developer.nvidia.com/cuda-10.0-download-archive) | -
not working due to
the clang's bug [38811](https://bugs.llvm.org/show_bug.cgi?id=38811)
+
[patch](patches/patch_for_clang_8.0.1_bug_38811.zip)*
| + | -| [9.0.0](http://releases.llvm.org/download.html#9.0.0) | [10.1](https://developer.nvidia.com/cuda-10.1-download-archive-base) | + | + | -| [**9.0.1**](http://releases.llvm.org/download.html#9.0.1) | [**10.1**](https://developer.nvidia.com/cuda-10.1-download-archive-base) | +
**LATEST STABLE RELEASE** | +
**LATEST STABLE RELEASE** | -| [10.0.0-rc3](https://github.com/llvm/llvm-project/releases/tag/llvmorg-10.0.0-rc3) | [10.2](https://developer.nvidia.com/cuda-downloads) | + | + | - -`*` Download the patch and unpack it into your `LLVM` distributive directory; a few header files will be overwritten; rebuilding of `LLVM` is not needed. - -In most cases, you can get a suitable version of `LLVM+CLANG` with your package manager. - -Failing that or having multiple versions of `LLVM`, you can [download a release archive](http://releases.llvm.org/), build or install it, and set -[CMAKE_PREFIX_PATH](https://cmake.org/cmake/help/v3.5/variable/CMAKE_PREFIX_PATH.html) so `cmake` can find it; for instance: `-DCMAKE_PREFIX_PATH=f:\LLVM\9.0.1\dist` - -### hipify-clang: usage - -To process a file, `hipify-clang` needs access to the same headers that would be required to compile it with clang. - -For example: - -```shell -./hipify-clang square.cu --cuda-path=/usr/local/cuda-10.1 -I /usr/local/cuda-10.1/samples/common/inc -``` - -`hipify-clang` arguments are given first, followed by a separator `'--'`, and then the arguments you'd pass to `clang` if you -were compiling the input file. For example: - -```bash -./hipify-clang cpp17.cu --cuda-path=/usr/local/cuda-10.1 -- -std=c++17 -``` - -The [Clang manual for compiling CUDA](https://llvm.org/docs/CompileCudaWithLLVM.html#compiling-cuda-code) may be useful. - -For a list of `hipify-clang` options, run `hipify-clang --help`. - -### hipify-clang: building - -Assuming this repository is at `./HIP`: - -```bash -cd hipify-clang -mkdir build dist -cd build - -cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_BUILD_TYPE=Release \ - .. - -make -j install -``` -On Windows, the following option should be specified for `cmake` at first place: `-G "Visual Studio 16 2019 Win64"`; the generated `hipify-clang.sln` should be built by `Visual Studio 16 2019` instead of `make.` -Please, see [hipify-clang: Windows](#windows) for the supported tools for building. - -Debug build type `-DCMAKE_BUILD_TYPE=Debug` is also supported and tested; `LLVM+CLANG` should be built in `Debug` mode as well. -64-bit build mode (`-Thost=x64` on Windows) is also supported; `LLVM+CLANG` should be built in 64-bit mode as well. - -The binary can then be found at `./dist/bin/hipify-clang`. - -### hipify-clang: testing - -`hipify-clang` has unit tests using `LLVM` [`lit`](https://llvm.org/docs/CommandGuide/lit.html)/[`FileCheck`](https://llvm.org/docs/CommandGuide/FileCheck.html). - -`LLVM+CLANG` should be built from sources, pre-built binaries are not exhaustive for testing. - -**LLVM 9.0.1 or older:** - -1. download [`LLVM`](http://releases.llvm.org/9.0.1/llvm-9.0.1.src.tar.xz)+[`CLANG`](http://releases.llvm.org/9.0.1/cfe-9.0.1.src.tar.xz) sources; -2. build [`LLVM+CLANG`](http://releases.llvm.org/9.0.0/docs/CMake.html): - - **Linux**: - ```bash - cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm \ - -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \ - -DCMAKE_BUILD_TYPE=Release \ - ../llvm - make -j install - ``` - **Windows**: - ```shell - cmake \ - -G "Visual Studio 16 2019" \ - -A x64 \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm \ - -DLLVM_TARGETS_TO_BUILD="NVPTX" \ - -DCMAKE_BUILD_TYPE=Release \ - -Thost=x64 \ - ../llvm - ``` -Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build project `INSTALL`. - -**LLVM 10.0.0 or newer:** - -1. download [`LLVM project`](https://github.com/llvm/llvm-project/archive/llvmorg-10.0.0-rc3.tar.gz) sources; -2. build [`LLVM project`](http://llvm.org/docs/CMake.html): - - **Linux**: - ```bash - cmake \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm-project \ - -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON - -DCMAKE_BUILD_TYPE=Release \ - ../llvm-project/llvm - make -j install - ``` - **Windows**: - ```shell - cmake \ - -G "Visual Studio 16 2019" \ - -A x64 \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DLLVM_SOURCE_DIR=../llvm-project \ - -DLLVM_TARGETS_TO_BUILD="NVPTX" \ - -DLLVM_ENABLE_PROJECTS="clang" \ - -DLLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN=ON - -DCMAKE_BUILD_TYPE=Release \ - -Thost=x64 \ - ../llvm-project/llvm - ``` -Run `Visual Studio 16 2019`, open the generated `LLVM.sln`, build all, build project `INSTALL`. - -3. Ensure [`CUDA`](https://developer.nvidia.com/cuda-toolkit-archive) of minimum version 7.0 is installed. - - * Having multiple CUDA installations to choose a particular version the `DCUDA_TOOLKIT_ROOT_DIR` option should be specified: - - - ***Linux***: `-DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1` - - - ***Windows***: `-DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1"` - - `-DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1"` - -4. Ensure [`cuDNN`](https://developer.nvidia.com/rdp/cudnn-archive) of the version corresponding to CUDA's version is installed. - - * Path to cuDNN should be specified by the `CUDA_DNN_ROOT_DIR` option: - - - ***Linux***: `-DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32` - - - ***Windows***: `-DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32` - -5. Ensure [`CUB`](https://github.com/NVlabs/cub) of the version corresponding to CUDA's version is installed. - - * Path to CUB should be specified by the `CUDA_CUB_ROOT_DIR` option: - - - ***Linux***: `-DCUDA_CUB_ROOT_DIR=/srv/git/CUB` - - - ***Windows***: `-DCUDA_CUB_ROOT_DIR=f:/GIT/cub` - -5. Ensure [`python`](https://www.python.org/downloads) of minimum required version 2.7 is installed. - -6. Ensure `lit` and `FileCheck` are installed - these are distributed with `LLVM`. - - * Install `lit` into `python`: - - - ***Linux***: `python /srv/git/LLVM/9.0.1/llvm/utils/lit/setup.py install` - - - ***Windows***: `python f:/LLVM/9.0.1/llvm/utils/lit/setup.py install` - - * Starting with LLVM 6.0.1 path to `llvm-lit` python script should be specified by the `LLVM_EXTERNAL_LIT` option: - - - ***Linux***: `-DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit` - - - ***Windows***: `-DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py` - - * `FileCheck`: - - - ***Linux***: copy from `/srv/git/LLVM/9.0.1/build/bin/` to `CMAKE_INSTALL_PREFIX/dist/bin` - - - ***Windows***: copy from `f:/LLVM/9.0.1/build/Release/bin` to `CMAKE_INSTALL_PREFIX/dist/bin` - - - Or specify the path to `FileCheck` in `CMAKE_INSTALL_PREFIX` option - -7. Set `HIPIFY_CLANG_TESTS` option turned on: `-DHIPIFY_CLANG_TESTS=1`. - -8. Build and run tests: - -### hipify-clang: Linux - -On Linux the following configurations are tested: - -Ubuntu 14: LLVM 5.0.0 - 6.0.1, CUDA 7.0 - 9.0, cudnn-5.0.5 - cudnn-7.6.5.32 - -Ubuntu 16-18: LLVM 8.0.0 - 10.0.0-rc3, CUDA 8.0 - 10.2, cudnn-5.1.10 - cudnn-7.6.5.32 - -Minimum build system requirements for the above configurations: - -Python 2.7, cmake 3.5.1, GNU C/C++ 5.4.0. - -Here is an example of building `hipify-clang` with testing support on `Ubuntu 16.04`: - -```bash -cmake - -DHIPIFY_CLANG_TESTS=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=/srv/git/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda-10.1 \ - -DCUDA_DNN_ROOT_DIR=/srv/CUDNN/cudnn-10.1-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=/srv/git/CUB \ - -DLLVM_EXTERNAL_LIT=/srv/git/LLVM/9.0.1/build/bin/llvm-lit \ - .. -``` -*A corresponding successful output:* -```shell --- The C compiler identification is GNU 7.4.0 --- The CXX compiler identification is GNU 7.4.0 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- Detecting C compile features --- Detecting C compile features - done --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works --- Detecting CXX compiler ABI info --- Detecting CXX compiler ABI info - done --- Detecting CXX compile features --- Detecting CXX compile features - done --- Found LLVM 9.0.1: --- - CMake module path: /srv/git/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : /srv/git/LLVM/9.0.1/dist/include --- - Binary path : /srv/git/LLVM/9.0.1/dist/bin --- Linker detection: GNU ld --- Found PythonInterp: /usr/bin/python2.7 (found suitable version "2.7.12", minimum required is "2.7") --- Found lit: /usr/local/bin/lit --- Found FileCheck: /srv/git/LLVM/9.0.1/dist/bin/FileCheck --- Looking for pthread.h --- Looking for pthread.h - found --- Looking for pthread_create --- Looking for pthread_create - not found --- Looking for pthread_create in pthreads --- Looking for pthread_create in pthreads - not found --- Looking for pthread_create in pthread --- Looking for pthread_create in pthread - found --- Found Threads: TRUE --- Found CUDA: /usr/local/cuda-10.1 (found version "10.1") --- Configuring done --- Generating done --- Build files have been written to: /srv/git/HIP/hipify-clang/build -``` -```shell -make test-hipify -``` -*A corresponding successful output:* -```shell -Running HIPify regression tests -======================================== -CUDA 10.1 - will be used for testing -LLVM 9.0.1 - will be used for testing -x86_64 - Platform architecture -Linux 5.2.0 - Platform OS -64 - hipify-clang binary bitness -64 - python 2.7.12 binary bitness -======================================== --- Testing: 67 tests, 12 threads -- -PASS: hipify :: unit_tests/casts/reinterpret_cast.cu (1 of 67) -PASS: hipify :: unit_tests/device/math_functions.cu (2 of 67) -PASS: hipify :: unit_tests/device/atomics.cu (3 of 67) -PASS: hipify :: unit_tests/device/device_symbols.cu (4 of 67) -PASS: hipify :: unit_tests/headers/headers_test_01.cu (5 of 67) -PASS: hipify :: unit_tests/headers/headers_test_02.cu (6 of 67) -PASS: hipify :: unit_tests/headers/headers_test_03.cu (7 of 67) -PASS: hipify :: unit_tests/headers/headers_test_05.cu (8 of 67) -PASS: hipify :: unit_tests/headers/headers_test_04.cu (9 of 67) -PASS: hipify :: unit_tests/headers/headers_test_06.cu (10 of 67) -PASS: hipify :: unit_tests/headers/headers_test_07.cu (11 of 67) -PASS: hipify :: unit_tests/headers/headers_test_10.cu (12 of 67) -PASS: hipify :: unit_tests/headers/headers_test_11.cu (13 of 67) -PASS: hipify :: unit_tests/headers/headers_test_08.cu (14 of 67) -PASS: hipify :: unit_tests/kernel_launch/kernel_launch_01.cu (15 of 67) -PASS: hipify :: unit_tests/headers/headers_test_09.cu (16 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_02.cu (17 of 67) -PASS: hipify :: unit_tests/libraries/CAFFE2/caffe2_01.cu (18 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu (19 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu (20 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_03.cu (21 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_01.cu (22 of 67) -PASS: hipify :: unit_tests/libraries/CUB/cub_02.cu (23 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_0_based_indexing_rocblas.cu (24 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu (25 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu (26 of 67) -PASS: hipify :: unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu (27 of 67) -PASS: hipify :: unit_tests/libraries/cuComplex/cuComplex_Julia.cu (28 of 67) -PASS: hipify :: unit_tests/libraries/cuFFT/simple_cufft.cu (29 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_softmax.cu (30 of 67) -PASS: hipify :: unit_tests/libraries/cuDNN/cudnn_convolution_forward.cu (31 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/poisson_api_example.cu (32 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu (33 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_generate.cpp (34 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu (35 of 67) -PASS: hipify :: unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp (36 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu (37 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu (38 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu (39 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu (40 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu (41 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu (42 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu (43 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu (44 of 67) -PASS: hipify :: unit_tests/namespace/ns_kernel_launch.cu (45 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu (46 of 67) -PASS: hipify :: unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu (47 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals.cu (48 of 67) -PASS: hipify :: unit_tests/pp/pp_if_else_conditionals_01.cu (49 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp (50 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp (51 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp (52 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp (53 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp (54 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp (55 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/7_streams/stream.cpp (56 of 67) -PASS: hipify :: unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp (57 of 67) -PASS: hipify :: unit_tests/samples/MallocManaged.cpp (58 of 67) -PASS: hipify :: unit_tests/samples/allocators.cu (59 of 67) -PASS: hipify :: unit_tests/samples/coalescing.cu (60 of 67) -PASS: hipify :: unit_tests/samples/dynamic_shared_memory.cu (61 of 67) -PASS: hipify :: unit_tests/samples/axpy.cu (62 of 67) -PASS: hipify :: unit_tests/samples/intro.cu (63 of 67) -PASS: hipify :: unit_tests/samples/cudaRegister.cu (64 of 67) -PASS: hipify :: unit_tests/samples/square.cu (65 of 67) -PASS: hipify :: unit_tests/samples/static_shared_memory.cu (66 of 67) -PASS: hipify :: unit_tests/samples/vec_add.cu (67 of 67) -Testing Time: 3.07s - Expected Passes : 67 -[100%] Built target test-hipify -``` -### hipify-clang: Windows - -*Tested configurations:* - -| **LLVM** | **CUDA** | **cuDNN** | **Visual Studio** | **cmake** | **Python** | -|:--------------:|---------:|--------------------:|--------------------------:|----------:|-----------:| -| 5.0.0 - 5.0.2 | 8.0 | 5.1.10 - 7.1.4.18 | 2017.15.5.2 | 3.5.1 | 3.6.4 | -| 6.0.0 - 6.0.1 | 9.0 | 7.0.5.15 - 7.6.5.32 | 2017.15.5.5 | 3.6.0 | 3.7.2 | -| 7.0.0 - 7.1.0 | 9.2 | 7.6.5.32 | 2017.15.9.11 | 3.13.3 | 3.7.3 | -| 8.0.0 - 8.0.1 | 10.0 | 7.6.5.32 | 2017.15.9.15 | 3.14.2 | 3.7.4 | -| 9.0.0 - 9.0.1 | 10.1 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.0 | -| 10.0.0-rc1-rc3 | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.4 | 3.8.1 | -| 11.0.0git | 10.2 | 7.6.5.32 | 2017.15.9.20, 2019.16.4.5 | 3.16.5 | 3.8.2 | - -*Building with testing support on `Windows 10` by `Visual Studio 16 2019`:* - -```shell -cmake - -G "Visual Studio 16 2019" \ - -A x64 \ - -DHIPIFY_CLANG_TESTS=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../dist \ - -DCMAKE_PREFIX_PATH=f:/LLVM/9.0.1/dist \ - -DCUDA_TOOLKIT_ROOT_DIR="c:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1" \ - -DCUDA_SDK_ROOT_DIR="c:/ProgramData/NVIDIA Corporation/CUDA Samples/v10.1" \ - -DCUDA_DNN_ROOT_DIR=f:/CUDNN/cudnn-10.1-windows10-x64-v7.6.5.32 \ - -DCUDA_CUB_ROOT_DIR=f:/GIT/cub \ - -DLLVM_EXTERNAL_LIT=f:/LLVM/9.0.1/build/Release/bin/llvm-lit.py \ - -Thost=x64 - .. -``` -*A corresponding successful output:* -```shell --- Found LLVM 9.0.1: --- - CMake module path: F:/LLVM/9.0.1/dist/lib/cmake/llvm --- - Include path : F:/LLVM/9.0.1/dist/include --- - Binary path : F:/LLVM/9.0.1/dist/bin --- Found PythonInterp: C:/Program Files/Python38/python.exe (found suitable version "3.8.2", minimum required is "3.6") --- Found lit: C:/Program Files/Python38/Scripts/lit.exe --- Found FileCheck: F:/LLVM/9.0.1/dist/bin/FileCheck.exe --- Found CUDA: C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.1 (found version "10.1") --- Configuring done --- Generating done --- Build files have been written to: f:/HIP/hipify-clang/build -``` - -Run `Visual Studio 16 2019`, open the generated `hipify-clang.sln`, build project `test-hipify`. - -## hipify-perl - -`hipify-perl` is autogenerated perl-based script which heavily uses regular expressions. - -**Advantages:** - -1. Ease in use. - -2. It doesn't check the input source CUDA code for correctness. - -3. It doesn't have dependencies on 3rd party tools, including CUDA. - -**Disadvantages:** - -1. Current disability (and difficulty in implementing) of transforming the following constructs: - - * macros expansion; - - * namespaces: - - - redefines of CUDA entities in user namespaces; - - - using directive; - - * templates (some cases); - - * device/host function calls distinguishing; - - * header files correct injection; - - * complicated argument lists parsing. - -2. Difficulties in supporting. - -### hipify-perl: usage - -```shell -perl hipify-perl square.cu > square.cu.hip -``` - -### hipify-perl: building - -To generate `hipify-perl`, run `hipify-clang --perl`. Output directory for the generated `hipify-perl` file might be specified by `--o-hipify-perl-dir` option. - -## Supported CUDA APIs - -- [Runtime API](../docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md) -- [Driver API](../docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md) -- [cuComplex API](../docs/markdown/cuComplex_API_supported_by_HIP.md) -- [cuBLAS](../docs/markdown/CUBLAS_API_supported_by_HIP.md) -- [cuRAND](../docs/markdown/CURAND_API_supported_by_HIP.md) -- [cuDNN](../docs/markdown/CUDNN_API_supported_by_HIP.md) -- [cuFFT](../docs/markdown/CUFFT_API_supported_by_HIP.md) -- [cuSPARSE](../docs/markdown/CUSPARSE_API_supported_by_HIP.md) - -## Disclaimer - -The information contained herein is for informational purposes only, and is subject to change without notice. While every precaution has been taken in the preparation of this document, it may contain technical inaccuracies, omissions and typographical errors, and AMD is under no obligation to update or otherwise correct this information. Advanced Micro Devices, Inc. makes no representations or warranties with respect to the accuracy or completeness of the contents of this document, and assumes no liability of any kind, including the implied warranties of noninfringement, merchantability or fitness for particular purposes, with respect to the operation or use of AMD hardware, software or other products described herein. No license, including implied or arising by estoppel, to any intellectual property rights is granted by this document. Terms and limitations applicable to the purchase or use of AMD's products are as set forth in a signed agreement between the parties or in AMD's Standard Terms and Conditions of Sale. - -AMD, the AMD Arrow logo, and combinations thereof are trademarks of Advanced Micro Devices, Inc. Other product names used in this publication are for identification purposes only and may be trademarks of their respective companies. - -Copyright (c) 2014-2020 Advanced Micro Devices, Inc. All rights reserved. diff --git a/hipify-clang/packaging/hipify-clang.txt b/hipify-clang/packaging/hipify-clang.txt deleted file mode 100644 index b189eff1e6..0000000000 --- a/hipify-clang/packaging/hipify-clang.txt +++ /dev/null @@ -1,58 +0,0 @@ -cmake_minimum_required(VERSION 2.8.3) -project(hipify-clang) - -install(PROGRAMS @HIPIFY_INSTALL_PATH@/hipify-clang DESTINATION bin) -install(DIRECTORY @HIPIFY_INSTALL_PATH@/include DESTINATION bin) - -# Check if .hipversion exists(only exists when hipify-clang is built with HIP) -if(EXISTS "@HIPIFY_INSTALL_PATH@/.hipVersion") - set(HIP_BUILD "TRUE") -else() - set(HIP_BUILD "FALSE") -endif() - -if(${HIP_BUILD}) - file(STRINGS @HIPIFY_INSTALL_PATH@/.hipVersion HipVersion) - foreach(NameAndValue ${HipVersion}) - # Get variable name - string(REGEX MATCH "^[^=]+" Name ${NameAndValue}) - # Get the value - string(REPLACE "${Name}=" "" Value ${NameAndValue}) - # Assign the vale to the variable - set(${Name} "${Value}") - endforeach() -endif() - -############################# -# Packaging steps -############################# -set(CPACK_SET_DESTDIR TRUE) -set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") -set(CPACK_PACKAGE_NAME "hipify-clang") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "hipify-clang: a clang-based tool to translate CUDA source code into portable HIP C++ automatically") -set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") -set(CPACK_PACKAGE_CONTACT "Mankov Evgeny ") - -if(${HIP_BUILD}) - set(CPACK_PACKAGE_VERSION ${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}) - set(CPACK_PACKAGE_VERSION_MAJOR ${HIP_VERSION_MAJOR}) - set(CPACK_PACKAGE_VERSION_MINOR ${HIP_VERSION_MINOR}) - set(CPACK_PACKAGE_VERSION_PATCH ${HIP_VERSION_PATCH}) -else() - set(CPACK_PACKAGE_VERSION @LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@.@LLVM_VERSION_PATCH@) - set(CPACK_PACKAGE_VERSION_MAJOR @LLVM_VERSION_MAJOR@) - set(CPACK_PACKAGE_VERSION_MINOR @LLVM_VERSION_MINOR@) - set(CPACK_PACKAGE_VERSION_PATCH @LLVM_VERSION_PATCH@) -endif() - -set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) -set(CPACK_GENERATOR "TGZ;DEB;RPM") -set(CPACK_BINARY_DEB "ON") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "cuda (>= 7.0)") -set(CPACK_BINARY_RPM "ON") -set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") -set(CPACK_RPM_PACKAGE_AUTOREQPROV "NO") -set(CPACK_RPM_PACKAGE_REQUIRES "cuda >= 7.0") -set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") -set(CPACK_SOURCE_GENERATOR "TGZ") -include(CPack) diff --git a/hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip b/hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip deleted file mode 100644 index 28205a2645..0000000000 Binary files a/hipify-clang/patches/patch_for_clang_7.0.0_bug_38811.zip and /dev/null differ diff --git a/hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip b/hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip deleted file mode 100644 index fca5bf52e3..0000000000 Binary files a/hipify-clang/patches/patch_for_clang_7.0.1_bug_38811.zip and /dev/null differ diff --git a/hipify-clang/patches/patch_for_clang_7.1.0_bug_38811.zip b/hipify-clang/patches/patch_for_clang_7.1.0_bug_38811.zip deleted file mode 100644 index 35bde0cce4..0000000000 Binary files a/hipify-clang/patches/patch_for_clang_7.1.0_bug_38811.zip and /dev/null differ diff --git a/hipify-clang/patches/patch_for_clang_8.0.0_bug_38811.zip b/hipify-clang/patches/patch_for_clang_8.0.0_bug_38811.zip deleted file mode 100644 index be1cea6a03..0000000000 Binary files a/hipify-clang/patches/patch_for_clang_8.0.0_bug_38811.zip and /dev/null differ diff --git a/hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip b/hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip deleted file mode 100644 index 16d66a2062..0000000000 Binary files a/hipify-clang/patches/patch_for_clang_8.0.1_bug_38811.zip and /dev/null differ diff --git a/hipify-clang/src/ArgParse.cpp b/hipify-clang/src/ArgParse.cpp deleted file mode 100644 index cbf2f941e8..0000000000 --- a/hipify-clang/src/ArgParse.cpp +++ /dev/null @@ -1,149 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "ArgParse.h" - -cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); - -cl::opt OutputFilename("o", - cl::desc("Output filename"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputDir("o-dir", - cl::desc("Output directory"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt GeneratePerl("perl", - cl::desc("Generate hipify-perl"), - cl::value_desc("perl"), - cl::cat(ToolTemplateCategory)); - -cl::opt GeneratePython("python", - cl::desc("Generate hipify-python"), - cl::value_desc("python"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputHipifyPerlDir("o-hipify-perl-dir", - cl::desc("Output directory for hipify-perl script"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputPythonMapDir("o-python-map-dir", - cl::desc("Output directory for Python map"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt TemporaryDir("temp-dir", - cl::desc("Temporary directory"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt CudaPath("cuda-path", - cl::desc("CUDA installation path"), - cl::value_desc("directory"), - cl::cat(ToolTemplateCategory)); - -cl::opt SaveTemps("save-temps", - cl::desc("Save temporary files"), - cl::value_desc("save-temps"), - cl::cat(ToolTemplateCategory)); - -cl::opt Verbose("v", - cl::desc("Show commands to run and use verbose output"), - cl::value_desc("v"), - cl::cat(ToolTemplateCategory)); - -cl::opt TranslateToRoc("roc", - cl::desc("Translate to roc instead of hip where it is possible"), - cl::value_desc("roc"), - cl::cat(ToolTemplateCategory)); - -cl::opt Inplace("inplace", - cl::desc("Modify input file inplace, replacing input with hipified output, save backup in .prehip file"), - cl::value_desc("inplace"), - cl::cat(ToolTemplateCategory)); - -cl::opt NoBackup("no-backup", - cl::desc("Don't create a backup file for the hipified source"), - cl::value_desc("no-backup"), - cl::cat(ToolTemplateCategory)); - -cl::opt NoOutput("no-output", - cl::desc("Don't write any translated output to stdout"), - cl::value_desc("no-output"), - cl::cat(ToolTemplateCategory)); - -cl::opt PrintStats("print-stats", - cl::desc("Print translation statistics"), - cl::value_desc("print-stats"), - cl::cat(ToolTemplateCategory)); - -cl::opt PrintStatsCSV("print-stats-csv", - cl::desc("Print translation statistics in CSV file"), - cl::value_desc("print-stats-csv"), - cl::cat(ToolTemplateCategory)); - -cl::opt OutputStatsFilename("o-stats", - cl::desc("Output filename for statistics"), - cl::value_desc("filename"), - cl::cat(ToolTemplateCategory)); - -cl::opt Examine("examine", - cl::desc("Combines -no-output and -print-stats options"), - cl::value_desc("examine"), - cl::cat(ToolTemplateCategory)); - -cl::opt DashDash(" ", - cl::desc("Separator between hipify-clang and clang options;\ndon't specify if there are no clang options"), - cl::ValueDisallowed, - cl::cat(ToolTemplateCategory)); - -cl::list IncludeDirs("I", - cl::desc("Add directory to include search path"), - cl::value_desc("directory"), - cl::ZeroOrMore, - cl::Prefix, - cl::cat(ToolTemplateCategory)); - -cl::list MacroNames("D", - cl::desc("Define to or 1 if omitted"), - cl::value_desc("macro>= SkipExcludedPPConditionalBlocks("skip-excluded-preprocessor-conditional-blocks", - cl::desc("Enable default preprocessor behaviour by skipping undefined conditional blocks"), - cl::value_desc("skip-excluded-preprocessor-conditional-blocks"), - cl::cat(ToolTemplateCategory)); - -cl::opt CudaGpuArch("cuda-gpu-arch", - cl::desc("CUDA GPU architecture (e.g. sm_35);\nmay be specified more than once"), - cl::value_desc("value"), - cl::ZeroOrMore, - cl::Prefix, - cl::cat(ToolTemplateCategory)); - - -cl::extrahelp CommonHelp(ct::CommonOptionsParser::HelpMessage); diff --git a/hipify-clang/src/ArgParse.h b/hipify-clang/src/ArgParse.h deleted file mode 100644 index 84053a036c..0000000000 --- a/hipify-clang/src/ArgParse.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Tooling/CommonOptionsParser.h" -#include "llvm/Support/CommandLine.h" - -namespace cl = llvm::cl; -namespace ct = clang::tooling; - -extern cl::OptionCategory ToolTemplateCategory; -extern cl::opt OutputFilename; -extern cl::opt OutputHipifyPerlDir; -extern cl::opt OutputPythonMapDir; -extern cl::opt OutputDir; -extern cl::opt TemporaryDir; -extern cl::opt CudaPath; -extern cl::list IncludeDirs; -extern cl::list MacroNames; -extern cl::opt Inplace; -extern cl::opt SaveTemps; -extern cl::opt GeneratePerl; -extern cl::opt GeneratePython; -extern cl::opt Verbose; -extern cl::opt NoBackup; -extern cl::opt NoOutput; -extern cl::opt PrintStats; -extern cl::opt PrintStatsCSV; -extern cl::opt OutputStatsFilename; -extern cl::opt Examine; -extern cl::extrahelp CommonHelp; -extern cl::opt TranslateToRoc; -extern cl::opt DashDash; -extern cl::opt SkipExcludedPPConditionalBlocks; -extern cl::opt CudaGpuArch; diff --git a/hipify-clang/src/CUDA2HIP.cpp b/hipify-clang/src/CUDA2HIP.cpp deleted file mode 100644 index 50f4682af2..0000000000 --- a/hipify-clang/src/CUDA2HIP.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps CUDA header names to HIP header names -const std::map CUDA_INCLUDE_MAP{ - // CUDA includes - {"cuda.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DRIVER}}, - {"cuda_runtime.h", {"hip/hip_runtime.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RUNTIME}}, - {"cuda_runtime_api.h", {"hip/hip_runtime_api.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"channel_descriptor.h", {"hip/channel_descriptor.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"device_functions.h", {"hip/device_functions.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"driver_types.h", {"hip/driver_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_fp16.h", {"hip/hip_fp16.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_texture_types.h", {"hip/hip_texture_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"texture_fetch_functions.h", {"", "", CONV_INCLUDE, API_RUNTIME}}, - {"vector_types.h", {"hip/hip_vector_types.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cuda_profiler_api.h", {"hip/hip_profile.h", "", CONV_INCLUDE, API_RUNTIME}}, - {"cooperative_groups.h", {"hip/hip_cooperative_groups.h", "", CONV_INCLUDE, API_RUNTIME}}, - // cuComplex includes - {"cuComplex.h", {"hip/hip_complex.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_COMPLEX}}, - // cuBLAS includes - {"cublas.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS}}, - {"cublas_v2.h", {"hipblas.h", "rocblas.h", CONV_INCLUDE_CUDA_MAIN_H, API_BLAS}}, - // cuRAND includes - {"curand.h", {"hiprand.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_RAND}}, - {"curand_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_discrete.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_discrete2.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_globals.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_lognormal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mrg32k3a.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32_host.h", {"hiprand_mtgp32_host.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32_kernel.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_mtgp32dc_p_11213.h", {"rocrand_mtgp32_11213.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_normal.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_normal_static.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_philox4x32_x.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_poisson.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_precalc.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - {"curand_uniform.h", {"hiprand_kernel.h", "", CONV_INCLUDE, API_RAND}}, - // cuDNN includes - {"cudnn.h", {"hipDNN.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_DNN}}, - // cuFFT includes - {"cufft.h", {"hipfft.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_FFT}}, - // cuSPARSE includes - {"cusparse.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}}, - {"cusparse_v2.h", {"hipsparse.h", "", CONV_INCLUDE_CUDA_MAIN_H, API_SPARSE}}, - // CUB includes - {"cub/cub.cuh", {"hipcub/hipcub.hpp", "", CONV_INCLUDE_CUDA_MAIN_H, API_CUB}}, - // CAFFE2 includes - {"caffe2/core/common_gpu.h", {"caffe2/core/hip/common_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/core/context_gpu.h", {"caffe2/core/hip/context_gpu.h", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/operator_fallback_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/spatial_batch_norm_op.h", {"caffe2/operators/hip/spatial_batch_norm_op_miopen.hip", "", CONV_INCLUDE, API_CAFFE2}}, - {"caffe2/operators/generate_proposals_op_util_nms_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/max_pool_with_index_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/rnn/recurrent_network_executor_gpu.h", {"", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/utils/math/reduce.cuh", {"caffe2/utils/math/hip/reduce.cuh", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/operators/gather_op.cuh", {"caffe2/operators/math/gather_op.cuh", "", CONV_INCLUDE, API_CAFFE2, UNSUPPORTED}}, - {"caffe2/core/common_cudnn.h", {"caffe2/core/hip/common_miopen.h", "", CONV_INCLUDE, API_CAFFE2}}, -}; - -const std::map& CUDA_RENAMES_MAP() { - static std::map ret; - if (!ret.empty()) { - return ret; - } - // First run, so compute the union map. - ret.insert(CUDA_DRIVER_TYPE_NAME_MAP.begin(), CUDA_DRIVER_TYPE_NAME_MAP.end()); - ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end()); - ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end()); - ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end()); - ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end()); - ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end()); - ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end()); - ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end()); - ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end()); - ret.insert(CUDA_RAND_FUNCTION_MAP.begin(), CUDA_RAND_FUNCTION_MAP.end()); - ret.insert(CUDA_DNN_TYPE_NAME_MAP.begin(), CUDA_DNN_TYPE_NAME_MAP.end()); - ret.insert(CUDA_DNN_FUNCTION_MAP.begin(), CUDA_DNN_FUNCTION_MAP.end()); - ret.insert(CUDA_FFT_TYPE_NAME_MAP.begin(), CUDA_FFT_TYPE_NAME_MAP.end()); - ret.insert(CUDA_FFT_FUNCTION_MAP.begin(), CUDA_FFT_FUNCTION_MAP.end()); - ret.insert(CUDA_SPARSE_TYPE_NAME_MAP.begin(), CUDA_SPARSE_TYPE_NAME_MAP.end()); - ret.insert(CUDA_SPARSE_FUNCTION_MAP.begin(), CUDA_SPARSE_FUNCTION_MAP.end()); - ret.insert(CUDA_CAFFE2_TYPE_NAME_MAP.begin(), CUDA_CAFFE2_TYPE_NAME_MAP.end()); - ret.insert(CUDA_CAFFE2_FUNCTION_MAP.begin(), CUDA_CAFFE2_FUNCTION_MAP.end()); - return ret; -}; diff --git a/hipify-clang/src/CUDA2HIP.h b/hipify-clang/src/CUDA2HIP.h deleted file mode 100644 index b02e7f1f3e..0000000000 --- a/hipify-clang/src/CUDA2HIP.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "llvm/ADT/StringRef.h" -#include -#include -#include "Statistics.h" - -// Maps CUDA header names to HIP header names -extern const std::map CUDA_INCLUDE_MAP; -// Maps the names of CUDA DRIVER API types to the corresponding HIP types -extern const std::map CUDA_DRIVER_TYPE_NAME_MAP; -// Maps the names of CUDA DRIVER API functions to the corresponding HIP functions -extern const std::map CUDA_DRIVER_FUNCTION_MAP; -// Maps the names of CUDA RUNTIME API types to the corresponding HIP types -extern const std::map CUDA_RUNTIME_TYPE_NAME_MAP; -// Maps the names of CUDA Complex API types to the corresponding HIP types -extern const std::map CUDA_COMPLEX_TYPE_NAME_MAP; -// Maps the names of CUDA Complex API functions to the corresponding HIP functions -extern const std::map CUDA_COMPLEX_FUNCTION_MAP; -// Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions -extern const std::map CUDA_RUNTIME_FUNCTION_MAP; -// Maps the names of CUDA BLAS API types to the corresponding HIP types -extern const std::map CUDA_BLAS_TYPE_NAME_MAP; -// Maps the names of CUDA BLAS API functions to the corresponding HIP functions -extern const std::map CUDA_BLAS_FUNCTION_MAP; -// Maps the names of CUDA RAND API types to the corresponding HIP types -extern const std::map CUDA_RAND_TYPE_NAME_MAP; -// Maps the names of CUDA RAND API functions to the corresponding HIP functions -extern const std::map CUDA_RAND_FUNCTION_MAP; -// Maps the names of CUDA DNN API types to the corresponding HIP types -extern const std::map CUDA_DNN_TYPE_NAME_MAP; -// Maps the names of CUDA DNN API functions to the corresponding HIP functions -extern const std::map CUDA_DNN_FUNCTION_MAP; -// Maps the names of CUDA FFT API types to the corresponding HIP types -extern const std::map CUDA_FFT_TYPE_NAME_MAP; -// Maps the names of CUDA FFT API functions to the corresponding HIP functions -extern const std::map CUDA_FFT_FUNCTION_MAP; -// Maps the names of CUDA SPARSE API types to the corresponding HIP types -extern const std::map CUDA_SPARSE_TYPE_NAME_MAP; -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -extern const std::map CUDA_SPARSE_FUNCTION_MAP; -// Maps the names of CUDA CAFFE2 API types to the corresponding HIP types -extern const std::map CUDA_CAFFE2_TYPE_NAME_MAP; -// Maps the names of CUDA CAFFE2 API functions to the corresponding HIP functions -extern const std::map CUDA_CAFFE2_FUNCTION_MAP; -// Maps the names of CUDA Device functions to the corresponding HIP functions -extern const std::map CUDA_DEVICE_FUNC_MAP; -// Maps the names of CUDA CUB API types to the corresponding HIP types -extern const std::map CUDA_CUB_TYPE_NAME_MAP; - -/** - * The union of all the above maps, except includes. - * - * This should be used rarely, but is still needed to convert macro definitions (which can - * contain any combination of the above things). AST walkers can usually get away with just - * looking in the lookup table for the type of element they are processing, however, saving - * a great deal of time. - */ -const std::map& CUDA_RENAMES_MAP(); diff --git a/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp b/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp deleted file mode 100644 index 8c9d3ea662..0000000000 --- a/hipify-clang/src/CUDA2HIP_BLAS_API_functions.cpp +++ /dev/null @@ -1,671 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_BLAS_FUNCTION_MAP{ - - // Blas management functions - {"cublasInit", {"hipblasInit", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasShutdown", {"hipblasShutdown", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetVersion", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetError", {"hipblasGetError", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasAlloc", {"hipblasAlloc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasFree", {"hipblasFree", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetKernelStream", {"hipblasSetKernelStream", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetAtomicsMode", {"hipblasGetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetAtomicsMode", {"hipblasSetAtomicsMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetMathMode", {"hipblasGetMathMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetMathMode", {"hipblasSetMathMode", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas logging - {"cublasLogCallback", {"hipblasLogCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasLoggerConfigure", {"hipblasLoggerConfigure", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetLoggerCallback", {"hipblasSetLoggerCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetLoggerCallback", {"hipblasGetLoggerCallback", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas1 (v1) Routines - {"cublasCreate", {"hipblasCreate", "rocblas_create_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDestroy", {"hipblasDestroy", "rocblas_destroy_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetStream", {"hipblasSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetStream", {"hipblasGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetPointerMode", {"hipblasSetPointerMode", "rocblas_set_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetPointerMode", {"hipblasGetPointerMode", "rocblas_get_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetVector", {"hipblasSetVector", "rocblas_set_vector", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetVector", {"hipblasGetVector", "rocblas_get_vector", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetVectorAsync", {"hipblasSetVectorAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetVectorAsync", {"hipblasGetVectorAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetMatrix", {"hipblasSetMatrix", "rocblas_set_matrix", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetMatrix", {"hipblasGetMatrix", "rocblas_get_matrix", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetMatrixAsync", {"hipblasSetMatrixAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetMatrixAsync", {"hipblasGetMatrixAsync", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasXerbla", {"hipblasXerbla", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // NRM2 - {"cublasSnrm2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDnrm2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScnrm2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDznrm2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasNrm2Ex", {"hipblasNrm2Ex", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // DOT - {"cublasSdot", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDdot", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotu", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotc", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotu", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotc", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS}}, - - // SCAL - {"cublasSscal", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDscal", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCscal", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsscal", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZscal", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdscal", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS}}, - - // AXPY - {"cublasSaxpy", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDaxpy", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCaxpy", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZaxpy", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS}}, - - // COPY - {"cublasScopy", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDcopy", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCcopy", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZcopy", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS}}, - - // SWAP - {"cublasSswap", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDswap", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCswap", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZswap", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS}}, - - // AMAX - {"cublasIsamax", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamax", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamax", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamax", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS}}, - - // AMIN - {"cublasIsamin", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamin", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamin", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamin", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS}}, - - // ASUM - {"cublasSasum", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDasum", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScasum", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDzasum", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS}}, - - // ROT - {"cublasSrot", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrot", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrot", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsrot", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrot", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdrot", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS}}, - - // ROTG - {"cublasSrotg", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotg", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrotg", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrotg", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS}}, - - // ROTM - {"cublasSrotm", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotm", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS}}, - - // ROTMG - {"cublasSrotmg", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotmg", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS}}, - - // GEMV - {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS}}, - // NOTE: there is no such a function in CUDA - {"cublasSgemvBatched", {"hipblasSgemvBatched", "rocblas_sgemv_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemv", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemv", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS}}, - - // GBMV - {"cublasSgbmv", {"hipblasSgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgbmv", {"hipblasDgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgbmv", {"hipblasCgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgbmv", {"hipblasZgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMV - {"cublasStrmv", {"hipblasStrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrmv", {"hipblasDtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrmv", {"hipblasCtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmv", {"hipblasZtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBMV - {"cublasStbmv", {"hipblasStbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbmv", {"hipblasDtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbmv", {"hipblasCtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbmv", {"hipblasZtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPMV - {"cublasStpmv", {"hipblasStpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpmv", {"hipblasDtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpmv", {"hipblasCtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpmv", {"hipblasZtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSV - {"cublasStrsv", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsv", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsv", {"hipblasCtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsv", {"hipblasZtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPSV - {"cublasStpsv", {"hipblasStpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpsv", {"hipblasDtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpsv", {"hipblasCtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpsv", {"hipblasZtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBSV - {"cublasStbsv", {"hipblasStbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbsv", {"hipblasDtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbsv", {"hipblasCtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbsv", {"hipblasZtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMV/HEMV - {"cublasSsymv", {"hipblasSsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymv", {"hipblasDsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymv", {"hipblasCsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymv", {"hipblasZsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChemv", {"hipblasChemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemv", {"hipblasZhemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SBMV/HBMV - {"cublasSsbmv", {"hipblasSsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsbmv", {"hpiblasDsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChbmv", {"hipblasChbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhbmv", {"hipblasZhbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPMV/HPMV - {"cublasSspmv", {"hipblasSspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspmv", {"hipblasDspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpmv", {"hipblasChpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpmv", {"hipblasZhpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GER - {"cublasSger", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDger", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeru", {"hipblasCgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgerc", {"hipblasCgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeru", {"hipblasZgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgerc", {"hipblasZgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR/HER - {"cublasSsyr", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDsyr", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsyr", {"hipblasCsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr", {"hipblasZsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher", {"hipblasCher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher", {"hipblasZher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR/HPR - {"cublasSspr", {"hipblasSspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr", {"hipblasDspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr", {"hipblasChpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr", {"hipblasZhpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2/HER2 - {"cublasSsyr2", {"hipblasSsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2", {"hipblasDsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2", {"hipblasCsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2", {"hipblasZsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher2", {"hipblasCher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2", {"hipblasZher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR2/HPR2 - {"cublasSspr2", {"hipblasSspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr2", {"hipblasDspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr2", {"hipblasChpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr2", {"hipblasZhpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas3 (v1) Routines - // GEMM - {"cublasSgemm", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemm", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm", {"hipblasCgemm", "rocblas_cgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemm", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemm", {"hipblasHgemm", "rocblas_hgemm", CONV_LIB_FUNC, API_BLAS}}, - - // BATCH GEMM - {"cublasSgemmBatched", {"hipblasSgemmBatched", "rocblas_sgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemmBatched", {"hipblasDgemmBatched", "rocblas_dgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemmBatched", {"hipblasHgemmBatched", "rocblas_hgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSgemmStridedBatched", {"hipblasSgemmStridedBatched", "rocblas_sgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemmStridedBatched", {"hipblasDgemmStridedBatched", "rocblas_dgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemmBatched", {"hipblasCgemmBatched", "rocblas_cgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm3mBatched", {"hipblasCgemm3mBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemmBatched", {"hipblasZgemmBatched", "rocblas_zgemm_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemmStridedBatched", {"hipblasCgemmStridedBatched", "rocblas_cgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm3mStridedBatched", {"hipblasCgemm3mStridedBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemmStridedBatched", {"hipblasZgemmStridedBatched", "rocblas_zgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - {"cublasHgemmStridedBatched", {"hipblasHgemmStridedBatched", "rocblas_hgemm_strided_batched", CONV_LIB_FUNC, API_BLAS}}, - - // SYRK - {"cublasSsyrk", {"hipblasSsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrk", {"hipblasDsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrk", {"hipblasCsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrk", {"hipblasZsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERK - {"cublasCherk", {"hipblasCherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherk", {"hipblasZherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2K - {"cublasSsyr2k", {"hipblasSsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2k", {"hipblasDsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2k", {"hipblasCsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2k", {"hipblasZsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYRKX - eXtended SYRK - {"cublasSsyrkx", {"hipblasSsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrkx", {"hipblasDsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrkx", {"hipblasCsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrkx", {"hipblasZsyrkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HER2K - {"cublasCher2k", {"hipblasCher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2k", {"hipblasZher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERKX - eXtended HERK - {"cublasCherkx", {"hipblasCherkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherkx", {"hipblasZherkx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMM - {"cublasSsymm", {"hipblasSsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymm", {"hipblasDsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymm", {"hipblasCsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymm", {"hipblasZsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HEMM - {"cublasChemm", {"hipblasChemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemm", {"hipblasZhemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - {"cublasStrsm", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsm", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsm", {"hipblasCtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsm", {"hipblasZtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMM - {"cublasStrmm", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasDtrmm", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasCtrmm", {"hipblasCtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmm", {"hipblasZtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // ------------------------ CUBLAS BLAS - like extension (cublas_api.h) - // GEAM - {"cublasSgeam", {"hipblasSgeam", "rocblas_sgeam", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgeam", {"hipblasDgeam", "rocblas_dgeam", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeam", {"hipblasCgeam", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeam", {"hipblasZgeam", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GETRF - Batched LU - {"cublasSgetrfBatched", {"hipblasSgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetrfBatched", {"hipblasDgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetrfBatched", {"hipblasCgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetrfBatched", {"hipblasZgetrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batched inversion based on LU factorization from getrf - {"cublasSgetriBatched", {"hipblasSgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetriBatched", {"hipblasDgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetriBatched", {"hipblasCgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetriBatched", {"hipblasZgetriBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batched solver based on LU factorization from getrf - {"cublasSgetrsBatched", {"hipblasSgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgetrsBatched", {"hipblasDgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgetrsBatched", {"hipblasCgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgetrsBatched", {"hipblasZgetrsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - Batched Triangular Solver - {"cublasStrsmBatched", {"hipblasStrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrsmBatched", {"hipblasDtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrsmBatched", {"hipblasCtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsmBatched", {"hipblasZtrsmBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // MATINV - Batched - {"cublasSmatinvBatched", {"hipblasSmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDmatinvBatched", {"hipblasDmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCmatinvBatched", {"hipblasCmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZmatinvBatched", {"hipblasZmatinvBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Batch QR Factorization - {"cublasSgeqrfBatched", {"hipblasSgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgeqrfBatched", {"hipblasDgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgeqrfBatched", {"hipblasCgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeqrfBatched", {"hipblasZgeqrfBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Least Square Min only m >= n and Non-transpose supported - {"cublasSgelsBatched", {"hipblasSgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgelsBatched", {"hipblasDgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgelsBatched", {"hipblasCgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgelsBatched", {"hipblasZgelsBatched", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // DGMM - {"cublasSdgmm", {"hipblasSdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDdgmm", {"hipblasDdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCdgmm", {"hipblasCdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZdgmm", {"hipblasZdgmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPTTR - Triangular Pack format to Triangular format - {"cublasStpttr", {"hipblasStpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpttr", {"hipblasDtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpttr", {"hipblasCtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpttr", {"hipblasZtpttr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRTTP - Triangular format to Triangular Pack format - {"cublasStrttp", {"hipblasStrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrttp", {"hipblasDtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrttp", {"hipblasCtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrttp", {"hipblasZtrttp", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas2 (v2) Routines - {"cublasCreate_v2", {"hipblasCreate", "rocblas_create_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDestroy_v2", {"hipblasDestroy", "rocblas_destroy_handle", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetVersion_v2", {"hipblasGetVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGetProperty", {"hipblasGetProperty", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSetStream_v2", {"hipblasSetStream", "rocblas_set_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetStream_v2", {"hipblasGetStream", "rocblas_get_stream", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetPointerMode_v2", {"hipblasGetPointerMode", "rocblas_set_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasSetPointerMode_v2", {"hipblasSetPointerMode", "rocblas_get_pointer_mode", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGetCudartVersion", {"hipblasGetCudartVersion", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GEMV - {"cublasSgemv_v2", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemv_v2", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemv_v2", {"hipblasCgemv", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemv_v2", {"hipblasZgemv", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS}}, - - // GBMV - {"cublasSgbmv_v2", {"hipblasSgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDgbmv_v2", {"hipblasDgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgbmv_v2", {"hipblasCgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgbmv_v2", {"hipblasZgbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMV - {"cublasStrmv_v2", {"hipblasStrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtrmv_v2", {"hipblasDtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtrmv_v2", {"hipblasCtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmv_v2", {"hipblasZtrmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBMV - {"cublasStbmv_v2", {"hipblasStbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbmv_v2", {"hipblasDtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbmv_v2", {"hipblasCtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbmv_v2", {"hipblasZtbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPMV - {"cublasStpmv_v2", {"hipblasStpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpmv_v2", {"hipblasDtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpmv_v2", {"hipblasCtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpmv_v2", {"hipblasZtpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSV - {"cublasStrsv_v2", {"hipblasStrsv", "rocblas_strsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsv_v2", {"hipblasDtrsv", "rocblas_dtrsv", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsv_v2", {"hipblasCtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsv_v2", {"hipblasZtrsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TPSV - {"cublasStpsv_v2", {"hipblasStpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtpsv_v2", {"hipblasDtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtpsv_v2", {"hipblasCtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtpsv_v2", {"hipblasZtpsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TBSV - {"cublasStbsv_v2", {"hipblasStbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDtbsv_v2", {"hipblasDtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCtbsv_v2", {"hipblasCtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtbsv_v2", {"hipblasZtbsv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMV/HEMV - {"cublasSsymv_v2", {"hipblasSsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymv_v2", {"hipblasDsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymv_v2", {"hipblasCsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymv_v2", {"hipblasZsymv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChemv_v2", {"hipblasChemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemv_v2", {"hipblasZhemv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SBMV/HBMV - {"cublasSsbmv_v2", {"hipblasSsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsbmv_v2", {"hpiblasDsbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChbmv_v2", {"hipblasChbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhbmv_v2", {"hipblasZhbmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPMV/HPMV - {"cublasSspmv_v2", {"hipblasSspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspmv_v2", {"hipblasDspmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpmv_v2", {"hipblasChpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpmv_v2", {"hipblasZhpmv", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // GER - {"cublasSger_v2", {"hipblasSger", "rocblas_sger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDger_v2", {"hipblasDger", "rocblas_dger", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgeru_v2", {"hipblasCgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgerc_v2", {"hipblasCgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgeru_v2", {"hipblasZgeru", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgerc_v2", {"hipblasZgerc", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR/HER - {"cublasSsyr_v2", {"hipblasSsyr", "rocblas_ssyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDsyr_v2", {"hipblasDsyr", "rocblas_dsyr", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsyr_v2", {"hipblasCsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr_v2", {"hipblasZsyr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher_v2", {"hipblasCher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher_v2", {"hipblasZher", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR/HPR - {"cublasSspr_v2", {"hipblasSspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr_v2", {"hipblasDspr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr_v2", {"hipblasChpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr_v2", {"hipblasZhpr", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2/HER2 - {"cublasSsyr2_v2", {"hipblasSsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2_v2", {"hipblasDsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2_v2", {"hipblasCsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2_v2", {"hipblasZsyr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCher2_v2", {"hipblasCher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2_v2", {"hipblasZher2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SPR2/HPR2 - {"cublasSspr2_v2", {"hipblasSspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDspr2_v2", {"hipblasDspr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasChpr2_v2", {"hipblasChpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhpr2_v2", {"hipblasZhpr2", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // Blas3 (v2) Routines - // GEMM - {"cublasSgemm_v2", {"hipblasSgemm", "rocblas_sgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDgemm_v2", {"hipblasDgemm", "rocblas_dgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCgemm_v2", {"hipblasCgemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgemm3m", {"hipblasCgemm3m", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCgemm3mEx", {"hipblasCgemm3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZgemm_v2", {"hipblasZgemm", "rocblas_zgemm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZgemm3m", {"hipblasZgemm3m", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - //IO in FP16 / FP32, computation in float - {"cublasSgemmEx", {"hipblasSgemmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGemmEx", {"hipblasGemmEx", "rocblas_gemm_ex", CONV_LIB_FUNC, API_BLAS}}, - {"cublasGemmBatchedEx", {"hipblasGemmBatchedEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasGemmStridedBatchedEx", {"hipblasGemmStridedBatchedEx", "rocblas_gemm_strided_batched_ex", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCgemmEx", {"hipblasCgemmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasUint8gemmBias", {"hipblasUint8gemmBias", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYRK - {"cublasSsyrk_v2", {"hipblasSsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyrk_v2", {"hipblasDsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyrk_v2", {"hipblasCsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyrk_v2", {"hipblasZsyrk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCsyrkEx", {"hipblasCsyrkEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math - {"cublasCsyrk3mEx", {"hipblasCsyrk3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HERK - {"cublasCherk_v2", {"hipblasCherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex - {"cublasCherkEx", {"hipblasCherkEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - // IO in Int8 complex/cuComplex, computation in cuComplex, Gaussian math - {"cublasCherk3mEx", {"hipblasCherk3mEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZherk_v2", {"hipblasZherk", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYR2K - {"cublasSsyr2k_v2", {"hipblasSsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsyr2k_v2", {"hipblasDsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsyr2k_v2", {"hipblasCsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsyr2k_v2", {"hipblasZsyr2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HER2K - {"cublasCher2k_v2", {"hipblasCher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZher2k_v2", {"hipblasZher2k", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // SYMM - {"cublasSsymm_v2", {"hipblasSsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDsymm_v2", {"hipblasDsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasCsymm_v2", {"hipblasCsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZsymm_v2", {"hipblasZsymm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // HEMM - {"cublasChemm_v2", {"hipblasChemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZhemm_v2", {"hipblasZhemm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRSM - {"cublasStrsm_v2", {"hipblasStrsm", "rocblas_strsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDtrsm_v2", {"hipblasDtrsm", "rocblas_dtrsm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCtrsm_v2", {"hipblasCtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrsm_v2", {"hipblasZtrsm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // TRMM - {"cublasStrmm_v2", {"hipblasStrmm", "rocblas_strmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasDtrmm_v2", {"hipblasDtrmm", "rocblas_dtrmm", CONV_LIB_FUNC, API_BLAS, HIP_UNSUPPORTED}}, - {"cublasCtrmm_v2", {"hipblasCtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasZtrmm_v2", {"hipblasZtrmm", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - // NRM2 - {"cublasSnrm2_v2", {"hipblasSnrm2", "rocblas_snrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDnrm2_v2", {"hipblasDnrm2", "rocblas_dnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScnrm2_v2", {"hipblasScnrm2", "rocblas_scnrm2", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDznrm2_v2", {"hipblasDznrm2", "rocblas_dznrm2", CONV_LIB_FUNC, API_BLAS}}, - - // DOT - {"cublasDotEx", {"hipblasDotEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasDotcEx", {"hipblasDotcEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - - {"cublasSdot_v2", {"hipblasSdot", "rocblas_sdot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDdot_v2", {"hipblasDdot", "rocblas_ddot", CONV_LIB_FUNC, API_BLAS}}, - - {"cublasCdotu_v2", {"hipblasCdotu", "rocblas_cdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCdotc_v2", {"hipblasCdotc", "rocblas_cdotc", CONV_LIB_FUNC, API_BLAS,}}, - {"cublasZdotu_v2", {"hipblasZdotu", "rocblas_zdotu", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdotc_v2", {"hipblasZdotc", "rocblas_zdotc", CONV_LIB_FUNC, API_BLAS}}, - - // SCAL - {"cublasScalEx", {"hipblasScalEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSscal_v2", {"hipblasSscal", "rocblas_sscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDscal_v2", {"hipblasDscal", "rocblas_dscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCscal_v2", {"hipblasCscal", "rocblas_cscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsscal_v2", {"hipblasCsscal", "rocblas_csscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZscal_v2", {"hipblasZscal", "rocblas_zscal", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdscal_v2", {"hipblasZdscal", "rocblas_zdscal", CONV_LIB_FUNC, API_BLAS}}, - - // AXPY - {"cublasAxpyEx", {"hipblasAxpyEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSaxpy_v2", {"hipblasSaxpy", "rocblas_saxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDaxpy_v2", {"hipblasDaxpy", "rocblas_daxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCaxpy_v2", {"hipblasCaxpy", "rocblas_caxpy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZaxpy_v2", {"hipblasZaxpy", "rocblas_zaxpy", CONV_LIB_FUNC, API_BLAS}}, - - // COPY - {"cublasCopyEx", {"hipblasCopyEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasScopy_v2", {"hipblasScopy", "rocblas_scopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDcopy_v2", {"hipblasDcopy", "rocblas_dcopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCcopy_v2", {"hipblasCcopy", "rocblas_ccopy", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZcopy_v2", {"hipblasZcopy", "rocblas_zcopy", CONV_LIB_FUNC, API_BLAS}}, - - // SWAP - {"cublasSwapEx", {"hipblasSwapEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSswap_v2", {"hipblasSswap", "rocblas_sswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDswap_v2", {"hipblasDswap", "rocblas_dswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCswap_v2", {"hipblasCswap", "rocblas_cswap", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZswap_v2", {"hipblasZswap", "rocblas_zswap", CONV_LIB_FUNC, API_BLAS}}, - - // AMAX - {"cublasIamaxEx", {"hipblasIamaxEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasIsamax_v2", {"hipblasIsamax", "rocblas_isamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamax_v2", {"hipblasIdamax", "rocblas_idamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamax_v2", {"hipblasIcamax", "rocblas_icamax", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamax_v2", {"hipblasIzamax", "rocblas_izamax", CONV_LIB_FUNC, API_BLAS}}, - - // AMIN - {"cublasIaminEx", {"hipblasIaminEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasIsamin_v2", {"hipblasIsamin", "rocblas_isamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIdamin_v2", {"hipblasIdamin", "rocblas_idamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIcamin_v2", {"hipblasIcamin", "rocblas_icamin", CONV_LIB_FUNC, API_BLAS}}, - {"cublasIzamin_v2", {"hipblasIzamin", "rocblas_izamin", CONV_LIB_FUNC, API_BLAS}}, - - // ASUM - {"cublasAsumEx", {"hipblasAsumEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSasum_v2", {"hipblasSasum", "rocblas_sasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDasum_v2", {"hipblasDasum", "rocblas_dasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasScasum_v2", {"hipblasScasum", "rocblas_scasum", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDzasum_v2", {"hipblasDzasum", "rocblas_dzasum", CONV_LIB_FUNC, API_BLAS}}, - - // ROT - {"cublasRotEx", {"hipblasRotEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrot_v2", {"hipblasSrot", "rocblas_srot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrot_v2", {"hipblasDrot", "rocblas_drot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrot_v2", {"hipblasCrot", "rocblas_crot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCsrot_v2", {"hipblasCsrot", "rocblas_csrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrot_v2", {"hipblasZrot", "rocblas_zrot", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZdrot_v2", {"hipblasZdrot", "rocblas_zdrot", CONV_LIB_FUNC, API_BLAS}}, - - // ROTG - {"cublasRotgEx", {"hipblasRotgEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotg_v2", {"hipblasSrotg", "rocblas_srotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotg_v2", {"hipblasDrotg", "rocblas_drotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasCrotg_v2", {"hipblasCrotg", "rocblas_crotg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasZrotg_v2", {"hipblasZrotg", "rocblas_zrotg", CONV_LIB_FUNC, API_BLAS}}, - - // ROTM - {"cublasRotmEx", {"hipblasRotmEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotm_v2", {"hipblasSrotm", "rocblas_srotm", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotm_v2", {"hipblasDrotm", "rocblas_drotm", CONV_LIB_FUNC, API_BLAS}}, - - // ROTMG - {"cublasRotmgEx", {"hipblasRotmgEx", "", CONV_LIB_FUNC, API_BLAS, UNSUPPORTED}}, - {"cublasSrotmg_v2", {"hipblasSrotmg", "rocblas_srotmg", CONV_LIB_FUNC, API_BLAS}}, - {"cublasDrotmg_v2", {"hipblasDrotmg", "rocblas_drotmg", CONV_LIB_FUNC, API_BLAS}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp b/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp deleted file mode 100644 index a747eb0040..0000000000 --- a/hipify-clang/src/CUDA2HIP_BLAS_API_types.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_BLAS_TYPE_NAME_MAP{ - // Blas defines - {"CUBLAS_VER_MAJOR", {"HIPBLAS_VER_MAJOR", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_MINOR", {"HIPBLAS_VER_MINOR", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_PATCH", {"HIPBLAS_VER_PATCH", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VER_BUILD", {"HIPBLAS_VER_BUILD", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_VERSION", {"HIPBLAS_VERSION", "", CONV_DEFINE, API_BLAS, HIP_UNSUPPORTED}}, - - // Blas operations - {"cublasOperation_t", {"hipblasOperation_t", "rocblas_operation", CONV_TYPE, API_BLAS}}, - {"CUBLAS_OP_N", {"HIPBLAS_OP_N", "rocblas_operation_none", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_T", {"HIPBLAS_OP_T", "rocblas_operation_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_C", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_HERMITAN", {"HIPBLAS_OP_C", "rocblas_operation_conjugate_transpose", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_OP_CONJG", {"HIPBLAS_OP_CONJG", "rocblas_operation_conjugate", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blas statuses - {"cublasStatus", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS}}, - {"cublasStatus_t", {"hipblasStatus_t", "rocblas_status", CONV_TYPE, API_BLAS}}, - {"CUBLAS_STATUS_SUCCESS", {"HIPBLAS_STATUS_SUCCESS", "rocblas_status_success", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_NOT_INITIALIZED", {"HIPBLAS_STATUS_NOT_INITIALIZED", "rocblas_status_invalid_handle", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_ALLOC_FAILED", {"HIPBLAS_STATUS_ALLOC_FAILED", "rocblas_status_memory_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_INVALID_VALUE", {"HIPBLAS_STATUS_INVALID_VALUE", "rocblas_status_invalid_pointer", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_MAPPING_ERROR", {"HIPBLAS_STATUS_MAPPING_ERROR", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_EXECUTION_FAILED", {"HIPBLAS_STATUS_EXECUTION_FAILED", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_INTERNAL_ERROR", {"HIPBLAS_STATUS_INTERNAL_ERROR", "rocblas_status_internal_error", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_NOT_SUPPORTED", {"HIPBLAS_STATUS_NOT_SUPPORTED", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_ARCH_MISMATCH", {"HIPBLAS_STATUS_ARCH_MISMATCH", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_STATUS_LICENSE_ERROR", {"HIPBLAS_STATUS_LICENSE_ERROR", "rocblas_status_not_implemented", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blas Fill Modes - {"cublasFillMode_t", {"hipblasFillMode_t", "rocblas_fill", CONV_TYPE, API_BLAS}}, - {"CUBLAS_FILL_MODE_LOWER", {"HIPBLAS_FILL_MODE_LOWER", "rocblas_fill_lower", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_FILL_MODE_UPPER", {"HIPBLAS_FILL_MODE_UPPER", "rocblas_fill_upper", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_FILL_MODE_FULL", {"HIPBLAS_FILL_MODE_FULL", "rocblas_fill_full", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Diag Types - {"cublasDiagType_t", {"hipblasDiagType_t", "rocblas_diagonal", CONV_TYPE, API_BLAS}}, - {"CUBLAS_DIAG_NON_UNIT", {"HIPBLAS_DIAG_NON_UNIT", "rocblas_diagonal_non_unit", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_DIAG_UNIT", {"HIPBLAS_DIAG_UNIT", "rocblas_diagonal_unit", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Side Modes - {"cublasSideMode_t", {"hipblasSideMode_t", "rocblas_side", CONV_TYPE, API_BLAS}}, - {"CUBLAS_SIDE_LEFT", {"HIPBLAS_SIDE_LEFT", "rocblas_side_left", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_SIDE_RIGHT", {"HIPBLAS_SIDE_RIGHT", "rocblas_side_right", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Pointer Modes - {"cublasPointerMode_t", {"hipblasPointerMode_t", "rocblas_pointer_mode", CONV_TYPE, API_BLAS}}, - {"CUBLAS_POINTER_MODE_HOST", {"HIPBLAS_POINTER_MODE_HOST", "rocblas_pointer_mode_host", CONV_NUMERIC_LITERAL, API_BLAS}}, - {"CUBLAS_POINTER_MODE_DEVICE", {"HIPBLAS_POINTER_MODE_DEVICE", "rocblas_pointer_mode_device", CONV_NUMERIC_LITERAL, API_BLAS}}, - - // Blas Atomics Modes - {"cublasAtomicsMode_t", {"hipblasAtomicsMode_t", "rocblas_atomics_mode", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_ATOMICS_NOT_ALLOWED", {"HIPBLAS_ATOMICS_NOT_ALLOWED", "rocblas_atomics_not_allowed", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - {"CUBLAS_ATOMICS_ALLOWED", {"HIPBLAS_ATOMICS_ALLOWED", "rocblas_atomics_allowed", CONV_NUMERIC_LITERAL, API_BLAS, HIP_UNSUPPORTED}}, - - // Blas Data Type - {"cublasDataType_t", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_BLAS}}, - - // Blas Math mode/tensor operation - {"cublasMath_t", {"hipblasMath_t", "", CONV_TYPE, API_BLAS, UNSUPPORTED}}, - {"CUBLAS_DEFAULT_MATH", {"HIPBLAS_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - {"CUBLAS_TENSOR_OP_MATH", {"HIPBLAS_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, - - // Blass different GEMM algorithms - {"cublasGemmAlgo_t", {"hipblasGemmAlgo_t", "rocblas_gemm_algo", CONV_TYPE, API_BLAS}}, - {"CUBLAS_GEMM_DFALT", {"HIPBLAS_GEMM_DEFAULT", "rocblas_gemm_algo_standard", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 // 0b0000000000 - {"CUBLAS_GEMM_DEFAULT", {"HIPBLAS_GEMM_DEFAULT", "rocblas_gemm_algo_standard", CONV_NUMERIC_LITERAL, API_BLAS}}, // -1 // 160 // 0b0000000000 - {"CUBLAS_GEMM_ALGO0", {"HIPBLAS_GEMM_ALGO0", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 0 - {"CUBLAS_GEMM_ALGO1", {"HIPBLAS_GEMM_ALGO1", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 1 - {"CUBLAS_GEMM_ALGO2", {"HIPBLAS_GEMM_ALGO2", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 2 - {"CUBLAS_GEMM_ALGO3", {"HIPBLAS_GEMM_ALGO3", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 3 - {"CUBLAS_GEMM_ALGO4", {"HIPBLAS_GEMM_ALGO4", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 4 - {"CUBLAS_GEMM_ALGO5", {"HIPBLAS_GEMM_ALGO5", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 5 - {"CUBLAS_GEMM_ALGO6", {"HIPBLAS_GEMM_ALGO6", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 6 - {"CUBLAS_GEMM_ALGO7", {"HIPBLAS_GEMM_ALGO7", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 7 - {"CUBLAS_GEMM_ALGO8", {"HIPBLAS_GEMM_ALGO8", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 8 - {"CUBLAS_GEMM_ALGO9", {"HIPBLAS_GEMM_ALGO9", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 9 - {"CUBLAS_GEMM_ALGO10", {"HIPBLAS_GEMM_ALGO10", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 10 - {"CUBLAS_GEMM_ALGO11", {"HIPBLAS_GEMM_ALGO11", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 11 - {"CUBLAS_GEMM_ALGO12", {"HIPBLAS_GEMM_ALGO12", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 12 - {"CUBLAS_GEMM_ALGO13", {"HIPBLAS_GEMM_ALGO13", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 13 - {"CUBLAS_GEMM_ALGO14", {"HIPBLAS_GEMM_ALGO14", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 14 - {"CUBLAS_GEMM_ALGO15", {"HIPBLAS_GEMM_ALGO15", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 15 - {"CUBLAS_GEMM_ALGO16", {"HIPBLAS_GEMM_ALGO16", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 16 - {"CUBLAS_GEMM_ALGO17", {"HIPBLAS_GEMM_ALGO17", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 17 - {"CUBLAS_GEMM_ALGO18", {"HIPBLAS_GEMM_ALGO18", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 18 - {"CUBLAS_GEMM_ALGO19", {"HIPBLAS_GEMM_ALGO19", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 19 - {"CUBLAS_GEMM_ALGO20", {"HIPBLAS_GEMM_ALGO20", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 20 - {"CUBLAS_GEMM_ALGO21", {"HIPBLAS_GEMM_ALGO21", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 21 - {"CUBLAS_GEMM_ALGO22", {"HIPBLAS_GEMM_ALGO22", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 22 - {"CUBLAS_GEMM_ALGO23", {"HIPBLAS_GEMM_ALGO23", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 23 - {"CUBLAS_GEMM_DEFAULT_TENSOR_OP", {"HIPBLAS_GEMM_DEFAULT_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 99 - {"CUBLAS_GEMM_DFALT_TENSOR_OP", {"HIPBLAS_GEMM_DFALT_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 99 - {"CUBLAS_GEMM_ALGO0_TENSOR_OP", {"HIPBLAS_GEMM_ALGO0_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 100 - {"CUBLAS_GEMM_ALGO1_TENSOR_OP", {"HIPBLAS_GEMM_ALGO1_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 101 - {"CUBLAS_GEMM_ALGO2_TENSOR_OP", {"HIPBLAS_GEMM_ALGO2_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 102 - {"CUBLAS_GEMM_ALGO3_TENSOR_OP", {"HIPBLAS_GEMM_ALGO3_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 103 - {"CUBLAS_GEMM_ALGO4_TENSOR_OP", {"HIPBLAS_GEMM_ALGO4_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 104 - {"CUBLAS_GEMM_ALGO5_TENSOR_OP", {"HIPBLAS_GEMM_ALGO5_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 105 - {"CUBLAS_GEMM_ALGO6_TENSOR_OP", {"HIPBLAS_GEMM_ALGO6_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 106 - {"CUBLAS_GEMM_ALGO7_TENSOR_OP", {"HIPBLAS_GEMM_ALGO7_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 107 - {"CUBLAS_GEMM_ALGO8_TENSOR_OP", {"HIPBLAS_GEMM_ALGO8_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 108 - {"CUBLAS_GEMM_ALGO9_TENSOR_OP", {"HIPBLAS_GEMM_ALGO9_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 109 - {"CUBLAS_GEMM_ALGO10_TENSOR_OP", {"HIPBLAS_GEMM_ALGO10_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 110 - {"CUBLAS_GEMM_ALGO11_TENSOR_OP", {"HIPBLAS_GEMM_ALGO11_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 111 - {"CUBLAS_GEMM_ALGO12_TENSOR_OP", {"HIPBLAS_GEMM_ALGO12_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 112 - {"CUBLAS_GEMM_ALGO13_TENSOR_OP", {"HIPBLAS_GEMM_ALGO13_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 113 - {"CUBLAS_GEMM_ALGO14_TENSOR_OP", {"HIPBLAS_GEMM_ALGO14_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 114 - {"CUBLAS_GEMM_ALGO15_TENSOR_OP", {"HIPBLAS_GEMM_ALGO15_TENSOR_OP", "", CONV_NUMERIC_LITERAL, API_BLAS, UNSUPPORTED}}, // 115 - - // TODO: rename hipblasDatatype_t to hipDataType_t and move from hipBLAS to HIP - {"cudaDataType_t", {"hipblasDatatype_t", "rocblas_datatype_", CONV_TYPE, API_RUNTIME}}, - {"cudaDataType", {"hipblasDatatype_t", "rocblas_datatype", CONV_TYPE, API_RUNTIME}}, - {"CUDA_R_16F", {"HIPBLAS_R_16F", "rocblas_datatype_f16_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 // 150 - {"CUDA_C_16F", {"HIPBLAS_C_16F", "rocblas_datatype_f16_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 // 153 - {"CUDA_R_32F", {"HIPBLAS_R_32F", "rocblas_datatype_f32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 // 151 - {"CUDA_C_32F", {"HIPBLAS_C_32F", "rocblas_datatype_f32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 // 154 - {"CUDA_R_64F", {"HIPBLAS_R_64F", "rocblas_datatype_f64_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 // 152 - {"CUDA_C_64F", {"HIPBLAS_C_64F", "rocblas_datatype_f64_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 // 155 - {"CUDA_R_8I", {"HIPBLAS_R_8I", "rocblas_datatype_i8_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 // 160 - {"CUDA_C_8I", {"HIPBLAS_C_8I", "rocblas_datatype_i8_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 // 164 - {"CUDA_R_8U", {"HIPBLAS_R_8U", "rocblas_datatype_u8_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 // 161 - {"CUDA_C_8U", {"HIPBLAS_C_8U", "rocblas_datatype_u8_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 // 165 - {"CUDA_R_32I", {"HIPBLAS_R_32I", "rocblas_datatype_i32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 10 // 162 - {"CUDA_C_32I", {"HIPBLAS_C_32I", "rocblas_datatype_i32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 11 // 166 - {"CUDA_R_32U", {"HIPBLAS_R_32U", "rocblas_datatype_u32_r", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 12 // 163 - {"CUDA_C_32U", {"HIPBLAS_C_32U", "rocblas_datatype_u32_c", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 // 167 - - {"cublasHandle_t", {"hipblasHandle_t", "rocblas_handle", CONV_TYPE, API_BLAS}}, - // TODO: dereferencing: typedef struct cublasContext *cublasHandle_t; - {"cublasContext", {"hipblasHandle_t", "_rocblas_handle", CONV_TYPE, API_BLAS, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp deleted file mode 100644 index 6e0c1a54e7..0000000000 --- a/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA Complex API functions to the corresponding HIP functions -const std::map CUDA_COMPLEX_FUNCTION_MAP{ - {"cuCrealf", {"hipCrealf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimagf", {"hipCimagf", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuFloatComplex", {"make_hipFloatComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuConjf", {"hipConjf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCaddf", {"hipCaddf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsubf", {"hipCsubf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmulf", {"hipCmulf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdivf", {"hipCdivf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabsf", {"hipCabsf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCreal", {"hipCreal", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimag", {"hipCimag", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuDoubleComplex", {"make_hipDoubleComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuConj", {"hipConj", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCadd", {"hipCadd", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsub", {"hipCsub", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmul", {"hipCmul", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdiv", {"hipCdiv", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabs", {"hipCabs", "", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuComplex", {"make_hipComplex", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfmaf", {"hipCfmaf", "", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfma", {"hipCfma", "", CONV_COMPLEX, API_COMPLEX}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp b/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp deleted file mode 100644 index 87016a21a0..0000000000 --- a/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA Complex API types to the corresponding HIP types -const std::map CUDA_COMPLEX_TYPE_NAME_MAP{ - {"cuFloatComplex", {"hipFloatComplex", "", CONV_TYPE, API_COMPLEX}}, - {"cuDoubleComplex", {"hipDoubleComplex", "", CONV_TYPE, API_COMPLEX}}, - {"cuComplex", {"hipComplex", "", CONV_TYPE, API_COMPLEX}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp b/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp deleted file mode 100644 index 765ce78a26..0000000000 --- a/hipify-clang/src/CUDA2HIP_DNN_API_functions.cpp +++ /dev/null @@ -1,299 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_DNN_FUNCTION_MAP{ - - {"cudnnGetVersion", {"hipdnnGetVersion", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetCudartVersion", {"hipdnnGetCudartVersion", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnQueryRuntimeError", {"hipdnnQueryRuntimeError", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetProperty", {"hipdnnGetProperty", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetErrorString", {"hipdnnGetErrorString", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnIm2Col", {"hipdnnIm2Col", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreate", {"hipdnnCreate", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroy", {"hipdnnDestroy", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetStream", {"hipdnnSetStream", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetStream", {"hipdnnGetStream", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetCallback", {"hipdnnSetCallback", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCallback", {"hipdnnGetCallback", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Tensor functions - {"cudnnCreateTensorDescriptor", {"hipdnnCreateTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor4dDescriptor", {"hipdnnSetTensor4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor4dDescriptorEx", {"hipdnnSetTensor4dDescriptorEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetTensor4dDescriptor", {"hipdnnGetTensor4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensorNdDescriptor", {"hipdnnSetTensorNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensorNdDescriptorEx", {"hipdnnSetTensorNdDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetTensorNdDescriptor", {"hipdnnGetTensorNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetTensorSizeInBytes", {"hipdnnGetTensorSizeInBytes", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyTensorDescriptor", {"hipdnnDestroyTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnTransformTensor", {"hipdnnTransformTensor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTransformTensorEx", {"hipdnnTransformTensorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnInitTransformDest", {"hipdnnInitTransformDest", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateTensorTransformDescriptor", {"hipdnnCreateTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetTensorTransformDescriptor", {"hipdnnSetTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetTensorTransformDescriptor", {"hipdnnGetTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyTensorTransformDescriptor", {"hipdnnDestroyTensorTransformDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAddTensor", {"hipdnnAddTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnCreateOpTensorDescriptor", {"hipdnnCreateOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetOpTensorDescriptor", {"hipdnnSetOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetOpTensorDescriptor", {"hipdnnGetOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyOpTensorDescriptor", {"hipdnnDestroyOpTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnOpTensor", {"hipdnnOpTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFoldedConvBackwardDataDescriptors", {"hipdnnGetFoldedConvBackwardDataDescriptors", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Reduce Tensor functions - {"cudnnCreateReduceTensorDescriptor", {"hipdnnCreateReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetReduceTensorDescriptor", {"hipdnnSetReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetReduceTensorDescriptor", {"hipdnnGetReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyReduceTensorDescriptor", {"hipdnnDestroyReduceTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetReductionIndicesSize", {"hipdnnGetReductionIndicesSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetReductionWorkspaceSize", {"hipdnnGetReductionWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnReduceTensor", {"hipdnnReduceTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetTensor", {"hipdnnSetTensor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnScaleTensor", {"hipdnnScaleTensor", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Filter functions - {"cudnnCreateFilterDescriptor", {"hipdnnCreateFilterDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetFilter4dDescriptor", {"hipdnnSetFilter4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilter4dDescriptor", {"hipdnnGetFilter4dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetFilterNdDescriptor", {"hipdnnSetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilterNdDescriptor", {"hipdnnGetFilterNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetFilterSizeInBytes", {"hipdnnGetFilterSizeInBytes", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTransformFilter", {"hipdnnTransformFilter", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFilterDescriptor", {"hipdnnDestroyFilterDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnReorderFilterAndBias", {"hipdnnReorderFilterAndBias", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Convolution functions - {"cudnnCreateConvolutionDescriptor", {"hipdnnCreateConvolutionDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetConvolutionMathType", {"hipdnnSetConvolutionMathType", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionMathType", {"hipdnnGetConvolutionMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolutionGroupCount", {"hipdnnSetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionGroupCount", {"hipdnnGetConvolutionGroupCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolutionReorderType", {"hipdnnSetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionReorderType", {"hipdnnGetConvolutionReorderType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetConvolution2dDescriptor", {"hipdnnSetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolution2dDescriptor", {"hipdnnGetConvolution2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolution2dForwardOutputDim", {"hipdnnGetConvolution2dForwardOutputDim", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetConvolutionNdDescriptor", {"hipdnnSetConvolutionNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionNdDescriptor", {"hipdnnGetConvolutionNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionNdForwardOutputDim", {"hipdnnGetConvolutionNdForwardOutputDim", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyConvolutionDescriptor", {"hipdnnDestroyConvolutionDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithmMaxCount", {"hipdnnGetConvolutionForwardAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionForwardAlgorithm", {"hipdnnFindConvolutionForwardAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionForwardAlgorithmEx", {"hipdnnFindConvolutionForwardAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithm", {"hipdnnGetConvolutionForwardAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionForwardAlgorithm_v7", {"hipdnnGetConvolutionForwardAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionForwardWorkspaceSize", {"hipdnnGetConvolutionForwardWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionForward", {"hipdnnConvolutionForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBiasActivationForward", {"hipdnnConvolutionBiasActivationForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionBackwardBias", {"hipdnnConvolutionBackwardBias", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardFilterAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionBackwardFilterAlgorithm", {"hipdnnFindConvolutionBackwardFilterAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionBackwardFilterAlgorithmEx", {"hipdnnFindConvolutionBackwardFilterAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithm", {"hipdnnGetConvolutionBackwardFilterAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardFilterAlgorithm_v7", {"hipdnnGetConvolutionBackwardFilterAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionBackwardFilterWorkspaceSize", {"hipdnnGetConvolutionBackwardFilterWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBackwardFilter", {"hipdnnConvolutionBackwardFilter", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardDataAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindConvolutionBackwardDataAlgorithm", {"hipdnnFindConvolutionBackwardDataAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnFindConvolutionBackwardDataAlgorithmEx", {"hipdnnFindConvolutionBackwardDataAlgorithmEx", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithm", {"hipdnnGetConvolutionBackwardDataAlgorithm", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetConvolutionBackwardDataAlgorithm_v7", {"hipdnnGetConvolutionBackwardDataAlgorithm_v7", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetConvolutionBackwardDataWorkspaceSize", {"hipdnnGetConvolutionBackwardDataWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnConvolutionBackwardData", {"hipdnnConvolutionBackwardData", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Sortmax functions - {"cudnnSoftmaxForward", {"hipdnnSoftmaxForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSoftmaxBackward", {"hipdnnSoftmaxBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Pooling functions - {"cudnnCreatePoolingDescriptor", {"hipdnnCreatePoolingDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPooling2dDescriptor", {"hipdnnSetPooling2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetPooling2dDescriptor", {"hipdnnGetPooling2dDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPoolingNdDescriptor", {"hipdnnSetPoolingNdDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetPoolingNdDescriptor", {"hipdnnGetPoolingNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetPoolingNdForwardOutputDim", {"hipdnnGetPoolingNdForwardOutputDim", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetPooling2dForwardOutputDim", {"hipdnnGetPooling2dForwardOutputDim", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyPoolingDescriptor", {"hipdnnDestroyPoolingDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnPoolingForward", {"hipdnnPoolingForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnPoolingBackward", {"hipdnnPoolingBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Activation functions - {"cudnnCreateActivationDescriptor", {"hipdnnCreateActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetActivationDescriptor", {"hipdnnSetActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetActivationDescriptor", {"hipdnnGetActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyActivationDescriptor", {"hipdnnDestroyActivationDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnActivationForward", {"hipdnnActivationForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnActivationBackward", {"hipdnnActivationBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN LRN functions - {"cudnnCreateLRNDescriptor", {"hipdnnCreateLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetLRNDescriptor", {"hipdnnSetLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetLRNDescriptor", {"hipdnnGetLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyLRNDescriptor", {"hipdnnDestroyLRNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnLRNCrossChannelForward", {"hipdnnLRNCrossChannelForward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnLRNCrossChannelBackward", {"hipdnnLRNCrossChannelBackward", "", CONV_LIB_FUNC, API_DNN}}, - - // cuDNN Divisive Normalization functions - {"cudnnDivisiveNormalizationForward", {"hipdnnDivisiveNormalizationForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDivisiveNormalizationBackward", {"hipdnnDivisiveNormalizationBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Batch Normalization functions - {"cudnnDeriveBNTensorDescriptor", {"hipdnnDeriveBNTensorDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationForwardTraining", {"hipdnnBatchNormalizationForwardTraining", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationForwardTrainingEx", {"hipdnnBatchNormalizationForwardTrainingEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnBatchNormalizationForwardInference", {"hipdnnBatchNormalizationForwardInference", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationBackward", {"hipdnnBatchNormalizationBackward", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnBatchNormalizationBackwardEx", {"hipdnnBatchNormalizationBackwardEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize", {"hipdnnGetBatchNormalizationForwardTrainingExWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationBackwardExWorkspaceSize", {"hipdnnGetBatchNormalizationBackwardExWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetBatchNormalizationTrainingExReserveSpaceSize", {"hipdnnGetBatchNormalizationTrainingExReserveSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Spatial Transformer functions - {"cudnnCreateSpatialTransformerDescriptor", {"hipdnnCreateSpatialTransformerDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetSpatialTransformerNdDescriptor", {"hipdnnSetSpatialTransformerNdDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroySpatialTransformerDescriptor", {"hipdnnDestroySpatialTransformerDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfGridGeneratorForward", {"hipdnnSpatialTfGridGeneratorForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfGridGeneratorBackward", {"hipdnnSpatialTfGridGeneratorBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfSamplerForward", {"hipdnnSpatialTfSamplerForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTfSamplerBackward", {"hipdnnSpatialTfSamplerBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Dropout functions - {"cudnnCreateDropoutDescriptor", {"hipdnnCreateDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyDropoutDescriptor", {"hipdnnDestroyDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDropoutGetStatesSize", {"hipdnnDropoutGetStatesSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDropoutGetReserveSpaceSize", {"hipdnnDropoutGetReserveSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetDropoutDescriptor", {"hipdnnSetDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetDropoutDescriptor", {"hipdnnGetDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRestoreDropoutDescriptor", {"hipdnnRestoreDropoutDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutForward", {"hipdnnDropoutForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutBackward", {"hipdnnDropoutBackward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN RNN functions - {"cudnnCreateRNNDescriptor", {"hipdnnCreateRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyRNNDescriptor", {"hipdnnDestroyRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNForwardInferenceAlgorithmMaxCount", {"hipdnnGetRNNForwardInferenceAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNForwardInferenceAlgorithmEx", {"hipdnnFindRNNForwardInferenceAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNForwardTrainingAlgorithmMaxCount", {"hipdnnGetRNNForwardTrainingAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNForwardTrainingAlgorithmEx", {"hipdnnFindRNNForwardTrainingAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBackwardDataAlgorithmMaxCount", {"hipdnnGetRNNBackwardDataAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNBackwardDataAlgorithmEx", {"hipdnnFindRNNBackwardDataAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBackwardWeightsAlgorithmMaxCount", {"hipdnnGetRNNBackwardWeightsAlgorithmMaxCount", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFindRNNBackwardWeightsAlgorithmEx", {"hipdnnFindRNNBackwardWeightsAlgorithmEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreatePersistentRNNPlan", {"hipdnnCreatePersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetPersistentRNNPlan", {"hipdnnSetPersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnDestroyPersistentRNNPlan", {"hipdnnDestroyPersistentRNNPlan", "", CONV_LIB_FUNC, API_DNN}}, - // NOTE" hipdnnSetRNNDescriptor has additional argument hipdnnRNNBiasMode_t *biasMode without default value - {"cudnnSetRNNDescriptor", {"hipdnnSetRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - // NOTE" hipdnnGetRNNDescriptor has additional argument hipdnnRNNBiasMode_t *biasMode without default value - {"cudnnGetRNNDescriptor", {"hipdnnGetRNNDescriptor", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNProjectionLayers", {"hipdnnSetRNNProjectionLayers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNProjectionLayers", {"hipdnnGetRNNProjectionLayers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNAlgorithmDescriptor", {"hipdnnSetRNNAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNMatrixMathType", {"hipdnnSetRNNMatrixMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNMatrixMathType", {"hipdnnGetRNNMatrixMathType", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNWorkspaceSize", {"hipdnnGetRNNWorkspaceSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNTrainingReserveSize", {"hipdnnGetRNNTrainingReserveSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNParamsSize", {"hipdnnGetRNNParamsSize", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNLinLayerMatrixParams", {"hipdnnGetRNNLinLayerMatrixParams", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnGetRNNLinLayerBiasParams", {"hipdnnGetRNNLinLayerBiasParams", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardInference", {"hipdnnRNNForwardInference", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardInferenceEx", {"hipdnnRNNForwardInferenceEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNForwardTraining", {"hipdnnRNNForwardTraining", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNForwardTrainingEx", {"hipdnnRNNForwardTrainingEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNBackwardData", {"hipdnnRNNBackwardData", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNBackwardDataEx", {"hipdnnRNNBackwardDataEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNBackwardWeights", {"hipdnnRNNBackwardWeights", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnRNNBackwardWeightsEx", {"hipdnnRNNBackwardWeightsEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNDescriptor_v5", {"hipdnnSetRNNDescriptor_v5", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNDescriptor_v6", {"hipdnnSetRNNDescriptor_v6", "", CONV_LIB_FUNC, API_DNN}}, - {"cudnnSetRNNPaddingMode", {"hipdnnSetRNNPaddingMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNPaddingMode", {"hipdnnGetRNNPaddingMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateRNNDataDescriptor", {"hipdnnCreateRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyRNNDataDescriptor", {"hipdnnDestroyRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNDataDescriptor", {"hipdnnSetRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNDataDescriptor", {"hipdnnGetRNNDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetRNNBiasMode", {"hipdnnSetRNNBiasMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetRNNBiasMode", {"hipdnnGetRNNBiasMode", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Connectionist Temporal Classification loss functions - {"cudnnCreateCTCLossDescriptor", {"hipdnnCreateCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetCTCLossDescriptor", {"hipdnnSetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetCTCLossDescriptorEx", {"hipdnnSetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossDescriptor", {"hipdnnGetCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossDescriptorEx", {"hipdnnGetCTCLossDescriptorEx", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyCTCLossDescriptor", {"hipdnnDestroyCTCLossDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCTCLoss", {"hipdnnCTCLoss", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetCTCLossWorkspaceSize", {"hipdnnGetCTCLossWorkspaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Algorithm functions - {"cudnnCreateAlgorithmDescriptor", {"hipdnnCreateAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAlgorithmDescriptor", {"hipdnnSetAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmDescriptor", {"hipdnnGetAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCopyAlgorithmDescriptor", {"hipdnnCopyAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAlgorithmDescriptor", {"hipdnnDestroyAlgorithmDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateAlgorithmPerformance", {"hipdnnCreateAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAlgorithmPerformance", {"hipdnnSetAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmPerformance", {"hipdnnGetAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAlgorithmPerformance", {"hipdnnDestroyAlgorithmPerformance", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAlgorithmSpaceSize", {"hipdnnGetAlgorithmSpaceSize", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSaveAlgorithm", {"hipdnnSaveAlgorithm", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRestoreAlgorithm", {"hipdnnRestoreAlgorithm", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Clipping functions - {"cudnnRNNSetClip", {"hipdnnRNNSetClip", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNGetClip", {"hipdnnRNNGetClip", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Sequence functions - {"cudnnCreateSeqDataDescriptor", {"hipdnnCreateSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroySeqDataDescriptor", {"hipdnnDestroySeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetSeqDataDescriptor", {"hipdnnSetSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetSeqDataDescriptor", {"hipdnnGetSeqDataDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Multihead Attention functions - {"cudnnCreateAttnDescriptor", {"hipdnnCreateAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyAttnDescriptor", {"hipdnnDestroyAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetAttnDescriptor", {"hipdnnSetAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetAttnDescriptor", {"hipdnnGetAttnDescriptor", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetMultiHeadAttnBuffers", {"hipdnnGetMultiHeadAttnBuffers", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetMultiHeadAttnWeights", {"hipdnnGetMultiHeadAttnWeights", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnForward", {"hipdnnMultiHeadAttnForward", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnBackwardData", {"hipdnnMultiHeadAttnBackwardData", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMultiHeadAttnBackwardWeights", {"hipdnnMultiHeadAttnBackwardWeights", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - - // cuDNN Fuse functions - {"cudnnCreateFusedOpsConstParamPack", {"hipdnnCreateFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsConstParamPack", {"hipdnnDestroyFusedOpsConstParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetFusedOpsConstParamPackAttribute", {"hipdnnSetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetFusedOpsConstParamPackAttribute", {"hipdnnGetFusedOpsConstParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateFusedOpsVariantParamPack", {"hipdnnCreateFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsVariantParamPack", {"hipdnnDestroyFusedOpsVariantParamPack", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSetFusedOpsVariantParamPackAttribute", {"hipdnnSetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnGetFusedOpsVariantParamPackAttribute", {"hipdnnGetFusedOpsVariantParamPackAttribute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCreateFusedOpsPlan", {"hipdnnCreateFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDestroyFusedOpsPlan", {"hipdnnDestroyFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnMakeFusedOpsPlan", {"hipdnnMakeFusedOpsPlan", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsExecute", {"hipdnnFusedOpsExecute", "", CONV_LIB_FUNC, API_DNN, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp b/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp deleted file mode 100644 index cc372067b1..0000000000 --- a/hipify-clang/src/CUDA2HIP_DNN_API_types.cpp +++ /dev/null @@ -1,391 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_DNN_TYPE_NAME_MAP{ - // cuDNN defines - {"CUDNN_VERSION", {"HIPDNN_VERSION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7000 - {"CUDNN_DIM_MAX", {"HIPDNN_DIM_MAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_LRN_MIN_N", {"HIPDNN_LRN_MIN_N", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_LRN_MAX_N", {"HIPDNN_LRN_MAX_N", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_LRN_MIN_K", {"HIPDNN_LRN_MIN_K", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1e-5 - {"CUDNN_LRN_MIN_BETA", {"HIPDNN_LRN_MIN_BETA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0.01 - {"CUDNN_BN_MIN_EPSILON", {"HIPDNN_BN_MIN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1e-5 - {"CUDNN_SEV_ERROR_EN", {"HIPDNN_SEV_ERROR_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_WARNING_EN", {"HIPDNN_SEV_WARNING_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_INFO_EN", {"HIPDNN_SEV_INFO_EN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEQDATA_DIM_COUNT", {"HIPDNN_SEQDATA_DIM_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - - // cuDNN enums - {"cudnnStatus_t", {"hipdnnStatus_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_STATUS_SUCCESS", {"HIPDNN_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_STATUS_NOT_INITIALIZED", {"HIPDNN_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_STATUS_ALLOC_FAILED", {"HIPDNN_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_STATUS_BAD_PARAM", {"HIPDNN_STATUS_BAD_PARAM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_STATUS_INTERNAL_ERROR", {"HIPDNN_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_STATUS_INVALID_VALUE", {"HIPDNN_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_STATUS_ARCH_MISMATCH", {"HIPDNN_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_STATUS_MAPPING_ERROR", {"HIPDNN_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_STATUS_EXECUTION_FAILED", {"HIPDNN_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"CUDNN_STATUS_NOT_SUPPORTED", {"HIPDNN_STATUS_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 9 - {"CUDNN_STATUS_LICENSE_ERROR", {"HIPDNN_STATUS_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 10 - {"CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", {"HIPDNN_STATUS_RUNTIME_PREREQUISITE_MISSING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 11 - {"CUDNN_STATUS_RUNTIME_IN_PROGRESS", {"HIPDNN_STATUS_RUNTIME_IN_PROGRESS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_STATUS_RUNTIME_FP_OVERFLOW", {"HIPDNN_STATUS_RUNTIME_FP_OVERFLOW", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"cudnnRuntimeTag_t", {"hipdnnRuntimeTag_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionMode_t", {"hipdnnConvolutionMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION", {"HIPDNN_CONVOLUTION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CROSS_CORRELATION", {"HIPDNN_CROSS_CORRELATION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnTensorFormat_t", {"hipdnnTensorFormat_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_TENSOR_NCHW", {"HIPDNN_TENSOR_NCHW", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_TENSOR_NHWC", {"HIPDNN_TENSOR_NHWC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_TENSOR_NCHW_VECT_C", {"HIPDNN_TENSOR_NCHW_VECT_C", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnFoldingDirection_t", {"hipdnnFoldingDirection_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_TRANSFORM_FOLD", {"HIPDNN_TRANSFORM_FOLD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0U - {"CUDNN_TRANSFORM_UNFOLD", {"HIPDNN_TRANSFORM_UNFOLD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U - {"cudnnDataType_t", {"hipdnnDataType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_DATA_FLOAT", {"HIPDNN_DATA_FLOAT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_DATA_DOUBLE", {"HIPDNN_DATA_DOUBLE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_DATA_HALF", {"HIPDNN_DATA_HALF", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_DATA_INT8", {"HIPDNN_DATA_INT8", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_DATA_INT32", {"HIPDNN_DATA_INT32", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_DATA_INT8x4", {"HIPDNN_DATA_INT8x4", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_DATA_UINT8", {"HIPDNN_DATA_UINT8", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_DATA_UINT8x4", {"HIPDNN_DATA_UINT8x4", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_DATA_INT8x32", {"HIPDNN_DATA_INT8x32", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"cudnnErrQueryMode_t", {"hipdnnErrQueryMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_ERRQUERY_RAWCODE", {"HIPDNN_ERRQUERY_RAWCODE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ERRQUERY_NONBLOCKING", {"HIPDNN_ERRQUERY_NONBLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_ERRQUERY_BLOCKING", {"HIPDNN_ERRQUERY_BLOCKING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnSeverity_t", {"hipdnnSeverity_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEV_FATAL", {"HIPDNN_SEV_FATAL", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_SEV_ERROR", {"HIPDNN_SEV_ERROR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_SEV_WARNING", {"HIPDNN_SEV_WARNING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_SEV_INFO", {"HIPDNN_SEV_INFO", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnConvolutionFwdAlgo_t", {"hipdnnConvolutionFwdAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_FWD_ALGO_GEMM", {"HIPDNN_CONVOLUTION_FWD_ALGO_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_FWD_ALGO_DIRECT", {"HIPDNN_CONVOLUTION_FWD_ALGO_DIRECT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_FWD_ALGO_FFT", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_FWD_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_CONVOLUTION_FWD_ALGO_COUNT", {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnConvolutionFwdPreference_t", {"hipdnnConvolutionFwdPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnDeterminism_t", {"hipdnnDeterminism_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_NON_DETERMINISTIC", {"HIPDNN_NON_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_DETERMINISTIC", {"HIPDNN_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnDivNormMode_t", {"hipdnnDivNormMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_DIVNORM_PRECOMPUTED_MEANS", {"HIPDNN_DIVNORM_PRECOMPUTED_MEANS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"cudnnCTCLossAlgo_t", {"hipdnnCTCLossAlgo_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_CTC_LOSS_ALGO_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", {"HIPDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnLRNMode_t", {"hipdnnLRNMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_LRN_CROSS_CHANNEL_DIM1", {"HIPDNN_LRN_CROSS_CHANNEL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 vs 1 - {"cudnnRNNInputMode_t", {"hipdnnRNNInputMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_LINEAR_INPUT", {"HIPDNN_LINEAR_INPUT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SKIP_INPUT", {"HIPDNN_SKIP_INPUT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnDirectionMode_t", {"hipdnnDirectionMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_UNIDIRECTIONAL", {"HIPDNN_UNIDIRECTIONAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_BIDIRECTIONAL", {"HIPDNN_BIDIRECTIONAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnMathType_t", {"hipdnnMathType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_DEFAULT_MATH", {"HIPDNN_DEFAULT_MATH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_TENSOR_OP_MATH", {"HIPDNN_TENSOR_OP_MATH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", {"HIPDNN_TENSOR_OP_MATH_ALLOW_CONVERSION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnNanPropagation_t", {"hipdnnNanPropagation_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_NOT_PROPAGATE_NAN", {"HIPDNN_NOT_PROPAGATE_NAN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_PROPAGATE_NAN", {"HIPDNN_PROPAGATE_NAN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataAlgo_t", {"hipdnnConvolutionBwdDataAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"cudnnConvolutionBwdFilterAlgo_t", {"hipdnnConvolutionBwdFilterAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_3", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"cudnnConvolutionBwdFilterPreference_t", {"hipdnnConvolutionBwdFilterPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",{"HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT","", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnRNNAlgo_t", {"hipdnnRNNAlgo_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_ALGO_STANDARD", {"HIPDNN_RNN_ALGO_STANDARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_ALGO_PERSIST_STATIC", {"HIPDNN_RNN_ALGO_PERSIST_STATIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_ALGO_PERSIST_DYNAMIC", {"HIPDNN_RNN_ALGO_PERSIST_DYNAMIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_RNN_ALGO_COUNT", {"HIPDNN_RNN_ALGO_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnRNNMode_t", {"hipdnnRNNMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_RELU", {"HIPDNN_RNN_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_TANH", {"HIPDNN_RNN_TANH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_LSTM", {"HIPDNN_LSTM", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_GRU", {"HIPDNN_GRU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnRNNBiasMode_t", {"hipdnnRNNBiasMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_RNN_NO_BIAS", {"HIPDNN_RNN_NO_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_RNN_SINGLE_INP_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_DOUBLE_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_RNN_SINGLE_REC_BIAS", {"HIPDNN_RNN_WITH_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnOpTensorOp_t", {"hipdnnOpTensorOp_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_OP_TENSOR_ADD", {"HIPDNN_OP_TENSOR_ADD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_OP_TENSOR_MUL", {"HIPDNN_OP_TENSOR_MUL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_OP_TENSOR_MIN", {"HIPDNN_OP_TENSOR_MIN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_OP_TENSOR_MAX", {"HIPDNN_OP_TENSOR_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_OP_TENSOR_SQRT", {"HIPDNN_OP_TENSOR_SQRT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_OP_TENSOR_NOT", {"HIPDNN_OP_TENSOR_NOT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"cudnnReduceTensorOp_t", {"hipdnnReduceTensorOp_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_REDUCE_TENSOR_ADD", {"HIPDNN_REDUCE_TENSOR_ADD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_REDUCE_TENSOR_MUL", {"HIPDNN_REDUCE_TENSOR_MUL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_REDUCE_TENSOR_MIN", {"HIPDNN_REDUCE_TENSOR_MIN", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_REDUCE_TENSOR_MAX", {"HIPDNN_REDUCE_TENSOR_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_REDUCE_TENSOR_AMAX", {"HIPDNN_REDUCE_TENSOR_AMAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_REDUCE_TENSOR_AVG", {"HIPDNN_REDUCE_TENSOR_AVG", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"CUDNN_REDUCE_TENSOR_NORM1", {"HIPDNN_REDUCE_TENSOR_NORM1", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 6 - {"CUDNN_REDUCE_TENSOR_NORM2", {"HIPDNN_REDUCE_TENSOR_NORM2", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 7 - {"CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS", {"HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 8 - {"cudnnReduceTensorIndices_t", {"hipdnnReduceTensorIndices_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_REDUCE_TENSOR_NO_INDICES", {"HIPDNN_REDUCE_TENSOR_NO_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES", {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnConvolutionBwdDataPreference_t", {"hipdnnConvolutionBwdDataPreference_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", {"HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", {"HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", {"HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnIndicesType_t", {"hipdnnIndicesType_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_32BIT_INDICES", {"HIPDNN_32BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_64BIT_INDICES", {"HIPDNN_64BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_16BIT_INDICES", {"HIPDNN_16BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_8BIT_INDICES", {"HIPDNN_8BIT_INDICES", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnSoftmaxAlgorithm_t", {"hipdnnSoftmaxAlgorithm_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_SOFTMAX_FAST", {"HIPDNN_SOFTMAX_FAST", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SOFTMAX_ACCURATE", {"HIPDNN_SOFTMAX_ACCURATE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_SOFTMAX_LOG", {"HIPDNN_SOFTMAX_LOG", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnSoftmaxMode_t", {"hipdnnSoftmaxMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_SOFTMAX_MODE_INSTANCE", {"HIPDNN_SOFTMAX_MODE_INSTANCE", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_SOFTMAX_MODE_CHANNEL", {"HIPDNN_SOFTMAX_MODE_CHANNEL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"cudnnPoolingMode_t", {"hipdnnPoolingMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_POOLING_MAX", {"HIPDNN_POOLING_MAX", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_POOLING_MAX_DETERMINISTIC", {"HIPDNN_POOLING_MAX_DETERMINISTIC", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"cudnnActivationMode_t", {"hipdnnActivationMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_ACTIVATION_SIGMOID", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_ACTIVATION_RELU", {"HIPDNN_ACTIVATION_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_ACTIVATION_TANH", {"HIPDNN_ACTIVATION_TANH", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"CUDNN_ACTIVATION_CLIPPED_RELU", {"HIPDNN_ACTIVATION_CLIPPED_RELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 3 - {"CUDNN_ACTIVATION_ELU", {"HIPDNN_ACTIVATION_ELU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 4 - {"CUDNN_ACTIVATION_IDENTITY", {"HIPDNN_ACTIVATION_PATHTRU", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 5 - {"cudnnBatchNormMode_t", {"hipdnnBatchNormMode_t", "", CONV_TYPE, API_DNN}}, - {"CUDNN_BATCHNORM_PER_ACTIVATION", {"HIPDNN_BATCHNORM_PER_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 0 - {"CUDNN_BATCHNORM_SPATIAL", {"HIPDNN_BATCHNORM_SPATIAL", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUDNN_BATCHNORM_SPATIAL_PERSISTENT", {"HIPDNN_BATCHNORM_SPATIAL_PERSISTENT", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 2 - {"cudnnSamplerType_t", {"hipdnnSamplerType_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SAMPLER_BILINEAR", {"HIPDNN_SAMPLER_BILINEAR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"cudnnBatchNormOps_t", {"hipdnnBatchNormOps_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_BATCHNORM_OPS_BN", {"HIPDNN_BATCHNORM_OPS_BN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_BATCHNORM_OPS_BN_ACTIVATION", {"HIPDNN_BATCHNORM_OPS_BN_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", {"HIPDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnRNNClipMode_t", {"hipdnnRNNClipMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_CLIP_NONE", {"HIPDNN_RNN_CLIP_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_CLIP_MINMAX", {"HIPDNN_RNN_CLIP_MINMAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnRNNDataLayout_t", {"hipdnnRNNDataLayout_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", {"HIPDNN_RNN_DATA_LAYOUT_SEQ_MAJOR_PACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", {"HIPDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnRNNPaddingMode_t", {"hipdnnRNNPaddingMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_RNN_PADDED_IO_DISABLED", {"HIPDNN_RNN_PADDED_IO_DISABLED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_RNN_PADDED_IO_ENABLED", {"HIPDNN_RNN_PADDED_IO_ENABLED", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnSeqDataAxis_t", {"hipdnnSeqDataAxis_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_SEQDATA_TIME_DIM", {"HIPDNN_SEQDATA_TIME_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_SEQDATA_BATCH_DIM", {"HIPDNN_SEQDATA_BATCH_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_SEQDATA_BEAM_DIM", {"HIPDNN_SEQDATA_BEAM_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_SEQDATA_VECT_DIM", {"HIPDNN_SEQDATA_VECT_DIM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"cudnnAttnQueryMap_t", {"hipdnnAttnQueryMap_t", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_ATTN_QUERYMAP_ALL_TO_ONE", {"HIPDNN_ATTN_QUERYMAP_ALL_TO_ONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ATTN_QUERYMAP_ONE_TO_ONE", {"HIPDNN_ATTN_QUERYMAP_ONE_TO_ONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U << 0 - {"CUDNN_ATTN_DISABLE_PROJ_BIASES", {"HIPDNN_ATTN_DISABLE_PROJ_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_ATTN_ENABLE_PROJ_BIASES", {"HIPDNN_ATTN_ENABLE_PROJ_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1U << 1 - {"cudnnMultiHeadAttnWeightKind_t", {"hipdnnMultiHeadAttnWeightKind_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_MH_ATTN_Q_WEIGHTS", {"HIPDNN_MH_ATTN_Q_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_MH_ATTN_K_WEIGHTS", {"HIPDNN_MH_ATTN_K_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_MH_ATTN_V_WEIGHTS", {"HIPDNN_MH_ATTN_V_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_MH_ATTN_O_WEIGHTS", {"HIPDNN_MH_ATTN_O_WEIGHTS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_MH_ATTN_Q_BIASES", {"HIPDNN_MH_ATTN_Q_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_MH_ATTN_K_BIASES", {"HIPDNN_MH_ATTN_K_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_MH_ATTN_V_BIASES", {"HIPDNN_MH_ATTN_V_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_MH_ATTN_O_BIASES", {"HIPDNN_MH_ATTN_O_BIASES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_ATTN_WKIND_COUNT", {"HIPDNN_ATTN_WKIND_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"cudnnWgradMode_t", {"hipdnnWgradMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_WGRAD_MODE_ADD", {"HIPDNN_WGRAD_MODE_ADD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_WGRAD_MODE_SET", {"HIPDNN_WGRAD_MODE_SET", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnReorderType_t", {"hipdnnReorderType_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_DEFAULT_REORDER", {"HIPDNN_DEFAULT_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_NO_REORDER", {"HIPDNN_NO_REORDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnLossNormalizationMode_t", {"hipdnnLossNormalizationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_LOSS_NORMALIZATION_NONE", {"HIPDNN_LOSS_NORMALIZATION_NONE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_LOSS_NORMALIZATION_SOFTMAX", {"HIPDNN_LOSS_NORMALIZATION_SOFTMAX", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"cudnnFusedOps_t", {"hipdnnFusedOps_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", {"HIPDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", {"HIPDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", {"HIPDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", {"HIPDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", {"HIPDNN_FUSED_DACTIVATION_FORK_DBATCHNORM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"cudnnFusedOpsConstParamLabel_t", {"hipdnnFusedOpsConstParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PARAM_XDESC", {"HIPDNN_PARAM_XDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PARAM_XDATA_PLACEHOLDER", {"HIPDNN_PARAM_XDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PARAM_BN_MODE", {"HIPDNN_PARAM_BN_MODE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_PARAM_BN_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_PARAM_ACTIVATION_DESC", {"HIPDNN_PARAM_ACTIVATION_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_PARAM_CONV_DESC", {"HIPDNN_PARAM_CONV_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_PARAM_WDESC", {"HIPDNN_PARAM_WDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_PARAM_WDATA_PLACEHOLDER", {"HIPDNN_PARAM_WDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 - {"CUDNN_PARAM_DWDESC", {"HIPDNN_PARAM_DWDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 - {"CUDNN_PARAM_DWDATA_PLACEHOLDER", {"HIPDNN_PARAM_DWDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 - {"CUDNN_PARAM_YDESC", {"HIPDNN_PARAM_YDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_PARAM_YDATA_PLACEHOLDER", {"HIPDNN_PARAM_YDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_PARAM_DYDESC", {"HIPDNN_PARAM_DYDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 - {"CUDNN_PARAM_DYDATA_PLACEHOLDER", {"HIPDNN_PARAM_DYDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 - {"CUDNN_PARAM_YSTATS_DESC", {"HIPDNN_PARAM_YSTATS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_PARAM_YSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 - {"CUDNN_PARAM_YSQSUM_PLACEHOLDER", {"HIPDNN_PARAM_YSQSUM_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 - {"CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", {"HIPDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 - {"CUDNN_PARAM_BN_SCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_SCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 - {"CUDNN_PARAM_BN_BIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_BIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 - {"CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 - {"CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", {"HIPDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 - {"CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 24 - {"CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", {"HIPDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 25 - {"CUDNN_PARAM_ZDESC", {"HIPDNN_PARAM_ZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 26 - {"CUDNN_PARAM_ZDATA_PLACEHOLDER", {"HIPDNN_PARAM_ZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 27 - {"CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", {"HIPDNN_PARAM_BN_Z_EQSCALEBIAS_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 28 - {"CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 29 - {"CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 30 - {"CUDNN_PARAM_ACTIVATION_BITMASK_DESC", {"HIPDNN_PARAM_ACTIVATION_BITMASK_DESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 31 - {"CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", {"HIPDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 32 - {"CUDNN_PARAM_DXDESC", {"HIPDNN_PARAM_DXDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 33 - {"CUDNN_PARAM_DXDATA_PLACEHOLDER", {"HIPDNN_PARAM_DXDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 34 - {"CUDNN_PARAM_DZDESC", {"HIPDNN_PARAM_DZDESC", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 35 - {"CUDNN_PARAM_DZDATA_PLACEHOLDER", {"HIPDNN_PARAM_DZDATA_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 36 - {"CUDNN_PARAM_BN_DSCALE_PLACEHOLDER", {"HIPDNN_PARAM_BN_DSCALE_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 37 - {"CUDNN_PARAM_BN_DBIAS_PLACEHOLDER", {"HIPDNN_PARAM_BN_DBIAS_PLACEHOLDER", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 38 - {"cudnnFusedOpsPointerPlaceHolder_t", {"hipdnnActivationMode_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PTR_NULL", {"HIPDNN_ACTIVATION_SIGMOID", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PTR_ELEM_ALIGNED", {"HIPDNN_ACTIVATION_RELU", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PTR_16B_ALIGNED", {"HIPDNN_ACTIVATION_TANH", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"cudnnFusedOpsVariantParamLabel_t", {"hipdnnFusedOpsVariantParamLabel_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"CUDNN_PTR_XDATA", {"HIPDNN_PTR_XDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 0 - {"CUDNN_PTR_BN_EQSCALE", {"HIPDNN_PTR_BN_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 1 - {"CUDNN_PTR_BN_EQBIAS", {"HIPDNN_PTR_BN_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 2 - {"CUDNN_PTR_WDATA", {"HIPDNN_PTR_WDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 3 - {"CUDNN_PTR_DWDATA", {"HIPDNN_PTR_DWDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 4 - {"CUDNN_PTR_YDATA", {"HIPDNN_PTR_YDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 5 - {"CUDNN_PTR_DYDATA", {"HIPDNN_PTR_DYDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 6 - {"CUDNN_PTR_YSUM", {"HIPDNN_PTR_YSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 7 - {"CUDNN_PTR_YSQSUM", {"HIPDNN_PTR_YSQSUM", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 8 - {"CUDNN_PTR_WORKSPACE", {"HIPDNN_PTR_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 9 - {"CUDNN_PTR_BN_SCALE", {"HIPDNN_PTR_BN_SCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 10 - {"CUDNN_PTR_BN_BIAS", {"HIPDNN_PTR_BN_BIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 11 - {"CUDNN_PTR_BN_SAVED_MEAN", {"HIPDNN_PTR_BN_SAVED_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 12 - {"CUDNN_PTR_BN_SAVED_INVSTD", {"HIPDNN_PTR_BN_SAVED_INVSTD", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 13 - {"CUDNN_PTR_BN_RUNNING_MEAN", {"HIPDNN_PTR_BN_RUNNING_MEAN", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 14 - {"CUDNN_PTR_BN_RUNNING_VAR", {"HIPDNN_PTR_BN_RUNNING_VAR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 15 - {"CUDNN_PTR_ZDATA", {"HIPDNN_PTR_ZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 16 - {"CUDNN_PTR_BN_Z_EQSCALE", {"HIPDNN_PTR_BN_Z_EQSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 17 - {"CUDNN_PTR_BN_Z_EQBIAS", {"HIPDNN_PTR_BN_Z_EQBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 18 - {"CUDNN_PTR_ACTIVATION_BITMASK", {"HIPDNN_PTR_ACTIVATION_BITMASK", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 19 - {"CUDNN_PTR_DXDATA", {"HIPDNN_PTR_DXDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 20 - {"CUDNN_PTR_DZDATA", {"HIPDNN_PTR_DZDATA", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 21 - {"CUDNN_PTR_BN_DSCALE", {"HIPDNN_PTR_BN_DSCALE", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 22 - {"CUDNN_PTR_BN_DBIAS", {"HIPDNN_PTR_BN_DBIAS", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 23 - {"CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", {"HIPDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 100 - {"CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", {"HIPDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 101 - {"CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", {"HIPDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 102 - {"CUDNN_SCALAR_DOUBLE_BN_EPSILON", {"HIPDNN_SCALAR_DOUBLE_BN_EPSILON", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // 103 - - // cuDNN types - {"cudnnContext", {"hipdnnContext", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnHandle_t", {"hipdnnHandle_t", "", CONV_TYPE, API_DNN}}, - {"cudnnTensorStruct", {"hipdnnTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorDescriptor_t", {"hipdnnTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionStruct", {"hipdnnConvolutionStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionDescriptor_t", {"hipdnnConvolutionDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnPoolingStruct", {"hipdnnPoolingStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnPoolingDescriptor_t", {"hipdnnPoolingDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnFilterStruct", {"hipdnnFilterStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFilterDescriptor_t", {"hipdnnFilterDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnLRNStruct", {"hipdnnLRNStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnLRNDescriptor_t", {"hipdnnLRNDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnActivationStruct", {"hipdnnActivationStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnActivationDescriptor_t", {"hipdnnActivationDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnSpatialTransformerStruct", {"hipdnnSpatialTransformerStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSpatialTransformerDescriptor_t", {"hipdnnSpatialTransformerDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnOpTensorStruct", {"hipdnnOpTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnOpTensorDescriptor_t", {"hipdnnOpTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnReduceTensorStruct", {"hipdnnReduceTensorStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnReduceTensorDescriptor_t", {"hipdnnReduceTensorDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnCTCLossStruct", {"hipdnnCTCLossStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCTCLossDescriptor_t", {"hipdnnCTCLossDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorTransformStruct", {"hipdnnTensorTransformStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnTensorTransformDescriptor_t", {"hipdnnTensorTransformDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnConvolutionFwdAlgoPerf_t", {"hipdnnConvolutionFwdAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionBwdFilterAlgoPerf_t", {"hipdnnConvolutionBwdFilterAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnConvolutionBwdDataAlgoPerf_t", {"hipdnnConvolutionBwdDataAlgoPerf_t", "", CONV_TYPE, API_DNN}}, - {"cudnnDropoutStruct", {"hipdnnDropoutStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDropoutDescriptor_t", {"hipdnnDropoutDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnAlgorithmStruct", {"hipdnnAlgorithmStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmDescriptor_t", {"hipdnnAlgorithmDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmPerformanceStruct", {"hipdnnAlgorithmPerformanceStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAlgorithmPerformance_t", {"hipdnnAlgorithmPerformance_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNStruct", {"hipdnnRNNStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDescriptor_t", {"hipdnnRNNDescriptor_t", "", CONV_TYPE, API_DNN}}, - {"cudnnPersistentRNNPlan", {"hipdnnPersistentRNNPlan", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnPersistentRNNPlan_t", {"hipdnnPersistentRNNPlan_t", "", CONV_TYPE, API_DNN}}, - {"cudnnAlgorithm_t", {"hipdnnAlgorithm_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnDebug_t", {"hipdnnDebug_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnCallback_t", {"hipdnnCallback_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDataStruct", {"hipdnnRNNDataStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnRNNDataDescriptor_t", {"hipdnnRNNDataDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSeqDataStruct", {"hipdnnSeqDataStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnSeqDataDescriptor_t", {"hipdnnSeqDataDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAttnStruct", {"hipdnnAttnStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnAttnDescriptor_t", {"hipdnnAttnDescriptor_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsConstParamStruct", {"hipdnnFusedOpsConstParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsConstParamPack_t", {"hipdnnFusedOpsConstParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsVariantParamStruct", {"hipdnnFusedOpsVariantParamStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsVariantParamPack_t", {"hipdnnFusedOpsVariantParamPack_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsPlanStruct", {"hipdnnFusedOpsPlanStruct", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, - {"cudnnFusedOpsPlan_t", {"hipdnnFusedOpsPlan_t", "", CONV_TYPE, API_DNN, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Device_functions.cpp b/hipify-clang/src/CUDA2HIP_Device_functions.cpp deleted file mode 100644 index 87fe2ac86c..0000000000 --- a/hipify-clang/src/CUDA2HIP_Device_functions.cpp +++ /dev/null @@ -1,616 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps CUDA header names to HIP header names -const std::map CUDA_DEVICE_FUNC_MAP{ - // math functions - {"abs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"labs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llabs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fabs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fabsf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"min", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fminf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"max", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmaxf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rsqrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expm1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log1p", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log1pf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"floor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"exp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cosh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acosh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acoshf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanh", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ldexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ldexpf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logb", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logbf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ilogb", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ilogbf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalbln", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"scalblnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"frexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"frexpf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"round", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"roundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lround", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lroundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llround", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llroundf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lrintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"llrintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nearbyint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nearbyintf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"trunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"truncf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdim", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdimf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hypot", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rhypot", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hypotf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rhypotf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm3d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm3d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm4d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm4d", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnormf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm3df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm3df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"norm4df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rnorm4df", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cbrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cbrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rcbrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"rcbrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinpi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinpif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cospi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cospif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincospi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sincospif", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"pow", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"modf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmod", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remainder", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remainderf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remquo", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"remquof", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"j1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"jn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"jnf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"y1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"yn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ynf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i0f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cyl_bessel_i1f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lgamma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdfinv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdfinvf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"normcdff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcx", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"erfcxf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"lgammaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tgamma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tgammaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"copysign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"copysignf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nextafter", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nextafterf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"nanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmaf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"acosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"asinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atan2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"cosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"coshf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sinhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"tanhf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"expf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"logf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"log10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"modff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"powf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"sqrtf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"ceilf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"floorf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fmodf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isfinite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"umin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"llmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"ullmin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"umax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"llmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"ullmax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinff", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finite", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finitef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbit", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbitf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__signbitl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__finitel", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isinfl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__isnanl", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_ldsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_fdsign", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"_Pow_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // static math functions declared in device-functions.h - {"mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"saturate", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"float2int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"int2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"uint2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // device functions - {"__mulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umulhi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__mul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umul64hi", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float_as_int", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint_as_float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float_as_uint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__syncthreads", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__threadfence", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__threadfence_block", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__saturatef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sad", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__usad", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__mul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__umul24", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdividef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdividef", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"fdivide", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__cosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__tanf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__sincosf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__expf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__exp10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__log2f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__log10f", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__logf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__powf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2int_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2uint_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ll_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2ull_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2float_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fadd_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsub_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmul_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fmaf_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frcp_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fsqrt_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__frsqrt_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__fdiv_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__clz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ffs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__popc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__brev", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__clzll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ffsll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__popcll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__brevll", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__byte_perm", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__rhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__urhadd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__double2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__prof_trigger", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__trap", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__brkpt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm0", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm1", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__pm3", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // SIMD functions - {"__vabs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vhaddu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmples2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmplts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmins2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vminu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vseteq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetges2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetles2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetlts2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsadu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubus2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vnegss2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsads2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vadd4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vaddus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vavgu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vhaddu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpeq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmples4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmplts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vcmpne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmaxu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vmins4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vminu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vseteq4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetles4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetleu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetlts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetltu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetges4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgeu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgts4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetgtu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsetne4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsadu4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsub4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsubus4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vneg4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vnegss4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vabsdiffs4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__vsads4", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // fp16 functions - {"__float2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__floats2half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2float", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__float22half2_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half22float2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2int_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__int2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2short_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2uint_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__uint2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ushort_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ull_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ull2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2ll_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rn", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rz", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_rd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ll2half_ru", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"htrunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hfloor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2trunc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2ceil", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2floor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rint", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__lowhigh2highlow", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__lows2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__highs2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisinf", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__halves2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__low2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__high2half2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half_as_short", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__half_as_ushort", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__short_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ushort_as_half", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldcg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldca", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__ldcs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__heq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hle2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hlt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisnan2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__h2div", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma2_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneg2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hdiv", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hadd_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hsub_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hmul_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hfma_sat", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneg", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__habs2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__habs", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__hbeq2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbne2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hble2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbge2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hblt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgt2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbequ2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbneu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbleu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgeu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbltu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hbgtu2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__heq", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hne", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hle", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hge", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hlt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hequ", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hneu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hleu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgeu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hltu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hgtu", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__hisnan", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hrcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hlog10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hexp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hcos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"hsin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2sqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rsqrt", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2rcp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2log10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp2", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2exp10", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2cos", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"h2sin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"__shfl_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_up_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_down_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - {"__shfl_xor_sync", {"", "", CONV_DEVICE_FUNC, API_RUNTIME, UNSUPPORTED}}, - // atomic functions - {"atomicAdd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicSub", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicExch", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicMin", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicMax", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicInc", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicDec", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicAnd", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicOr", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicXor", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, - {"atomicCAS", {"", "", CONV_DEVICE_FUNC, API_RUNTIME}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp deleted file mode 100644 index ab07a10e93..0000000000 --- a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ /dev/null @@ -1,815 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all CUDA Driver API functions -const std::map CUDA_DRIVER_FUNCTION_MAP{ - // 5.2. Error Handling - // no analogue - // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cuGetErrorName", {"hipGetErrorName_", "", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cuGetErrorString", {"hipGetErrorString_", "", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.3. Initialization - // no analogue - {"cuInit", {"hipInit", "", CONV_INIT, API_DRIVER}}, - - // 5.4 Version Management - // cudaDriverGetVersion - {"cuDriverGetVersion", {"hipDriverGetVersion", "", CONV_VERSION, API_DRIVER}}, - - // 5.5. Device Management - // cudaGetDevice - // NOTE: cudaGetDevice has additional attr: int ordinal - {"cuDeviceGet", {"hipGetDevice", "", CONV_DEVICE, API_DRIVER}}, - // cudaDeviceGetAttribute - {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", "", CONV_DEVICE, API_DRIVER}}, - // cudaGetDeviceCount - {"cuDeviceGetCount", {"hipGetDeviceCount", "", CONV_DEVICE, API_DRIVER}}, - // no analogue - {"cuDeviceGetLuid", {"hipDeviceGetLuid", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceGetName", {"hipDeviceGetName", "", CONV_DEVICE, API_DRIVER}}, - // cudaDeviceGetNvSciSyncAttributes - {"cuDeviceGetNvSciSyncAttributes", {"hipDeviceGetNvSciSyncAttributes", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceGetUuid", {"hipDeviceGetUuid", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuDeviceTotalMem", {"hipDeviceTotalMem", "", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", "", CONV_DEVICE, API_DRIVER}}, - - // 5.6. Device Management [DEPRECATED] - {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", "", CONV_DEVICE, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaGetDeviceProperties due to different attributes: cudaDeviceProp and CUdevprop - {"cuDeviceGetProperties", {"hipGetDeviceProperties_", "", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.7. Primary Context Management - // no analogues - {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", "", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", "", CONV_CONTEXT, API_DRIVER}}, - - // 5.8. Context Management - // no analogues, except a few - {"cuCtxCreate", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxCreate_v2", {"hipCtxCreate", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy_v2", {"hipCtxDestroy", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCurrent", {"hipCtxGetCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetDevice", {"hipCtxGetDevice", "", CONV_CONTEXT, API_DRIVER}}, - // cudaGetDeviceFlags - // TODO: rename to hipGetDeviceFlags - {"cuCtxGetFlags", {"hipCtxGetFlags", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetLimit - {"cuCtxGetLimit", {"hipDeviceGetLimit", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetSharedMemConfig - // TODO: rename to hipDeviceGetSharedMemConfig - {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceGetStreamPriorityRange - {"cuCtxGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPopCurrent", {"hipCtxPopCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent", {"hipCtxPushCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", "", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCurrent", {"hipCtxSetCurrent", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSetLimit - {"cuCtxSetLimit", {"hipDeviceSetLimit", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSetSharedMemConfig - // TODO: rename to hipDeviceSetSharedMemConfig - {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", "", CONV_CONTEXT, API_DRIVER}}, - // cudaDeviceSynchronize - // TODO: rename to hipDeviceSynchronize - {"cuCtxSynchronize", {"hipCtxSynchronize", "", CONV_CONTEXT, API_DRIVER}}, - - // 5.9. Context Management [DEPRECATED] - // no analogues - {"cuCtxAttach", {"hipCtxAttach", "", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxDetach", {"hipCtxDetach", "", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.10. Module Management - // no analogues - {"cuLinkAddData", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddData_v2", {"hipLinkAddData", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile_v2", {"hipLinkAddFile", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkComplete", {"hipLinkComplete", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate_v2", {"hipLinkCreate", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkDestroy", {"hipLinkDestroy", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetFunction", {"hipModuleGetFunction", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetTexRef", {"hipModuleGetTexRef", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoad", {"hipModuleLoad", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadData", {"hipModuleLoadData", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", "", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", "", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleUnload", {"hipModuleUnload", "", CONV_MODULE, API_DRIVER}}, - - // 5.11. Memory Management - // no analogue - {"cuArray3DCreate", {"hipArray3DCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DCreate_v2", {"hipArray3DCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArray3DGetDescriptor_v2", {"hipArray3DGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayCreate", {"hipArrayCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArrayCreate_v2", {"hipArrayCreate", "", CONV_MEMORY, API_DRIVER}}, - {"cuArrayDestroy", {"hipArrayDestroy", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor_v2", {"hipArrayGetDescriptor", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDeviceGetByPCIBusId - {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", "", CONV_MEMORY, API_DRIVER}}, - // cudaDeviceGetPCIBusId - {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcCloseMemHandle - {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcGetEventHandle - {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaIpcGetMemHandle - {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaIpcOpenEventHandle - {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaIpcOpenMemHandle - {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", "", CONV_MEMORY, API_DRIVER}}, - // cudaMalloc - {"cuMemAlloc", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAlloc_v2", {"hipMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostAlloc - {"cuMemAllocHost", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocHost_v2", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaMallocManaged - {"cuMemAllocManaged", {"hipMallocManaged", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMallocPitch due to different signatures - {"cuMemAllocPitch", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocPitch_v2", {"hipMemAllocPitch", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpy due to different signatures - {"cuMemcpy", {"hipMemcpy_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy2D due to different signatures - {"cuMemcpy2D", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpy2D_v2", {"hipMemcpyParam2D", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpy2DAsync/hipMemcpy2DAsync due to different signatures - {"cuMemcpy2DAsync", {"hipMemcpyParam2DAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpy2DAsync_v2", {"hipMemcpyParam2DAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy2DUnaligned_v2", {"hipMemcpy2DUnaligned", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3D due to different signatures - {"cuMemcpy3D", {"hipMemcpy3D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3D_v2", {"hipMemcpy3D_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DAsync due to different signatures - {"cuMemcpy3DAsync", {"hipMemcpy3DAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3DAsync_v2", {"hipMemcpy3DAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DPeer due to different signatures - {"cuMemcpy3DPeer", {"hipMemcpy3DPeer_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpy3DPeerAsync due to different signatures - {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyAsync due to different signatures - {"cuMemcpyAsync", {"hipMemcpyAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyArrayToArray due to different signatures - {"cuMemcpyAtoA", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyAtoD", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyAtoH", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyAtoH_v2", {"hipMemcpyAtoH", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyDtoA", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyDtoD", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoDAsync", {"hipMemcpyDtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoH", {"hipMemcpyDtoH", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyDtoHAsync", {"hipMemcpyDtoHAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoA", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoA_v2", {"hipMemcpyHtoA", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemcpyHtoD", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemcpyHtoDAsync", {"hipMemcpyHtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMemcpyPeer due to different signatures - {"cuMemcpyPeer", {"hipMemcpyPeer_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaMemcpyPeerAsync due to different signatures - {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync_", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaFree - {"cuMemFree", {"hipFree", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemFree_v2", {"hipFree", "", CONV_MEMORY, API_DRIVER}}, - // cudaFreeHost - {"cuMemFreeHost", {"hipHostFree", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemGetAddressRange", {"hipMemGetAddressRange", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetAddressRange_v2", {"hipMemGetAddressRange", "", CONV_MEMORY, API_DRIVER}}, - // cudaMemGetInfo - {"cuMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetInfo_v2", {"hipMemGetInfo", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostAlloc - {"cuMemHostAlloc", {"hipHostMalloc", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostGetDevicePointer - {"cuMemHostGetDevicePointer", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostGetDevicePointer_v2", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostGetFlags - {"cuMemHostGetFlags", {"hipHostGetFlags", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostRegister - {"cuMemHostRegister", {"hipHostRegister", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostRegister_v2", {"hipHostRegister", "", CONV_MEMORY, API_DRIVER}}, - // cudaHostUnregister - {"cuMemHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD16", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD16_v2", {"hipMemsetD16", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD16Async", {"hipMemsetD16Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD2D16", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16_v2", {"hipMemsetD2D16", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D32", {"hipMemsetD2D32", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32_v2", {"hipMemsetD2D32", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D8", {"hipMemsetD2D8", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8_v2", {"hipMemsetD2D8", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemset - {"cuMemsetD32", {"hipMemsetD32", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD32_v2", {"hipMemsetD32", "", CONV_MEMORY, API_DRIVER}}, - // cudaMemsetAsync - {"cuMemsetD32Async", {"hipMemsetD32Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD8", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, - {"cuMemsetD8_v2", {"hipMemsetD8", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - {"cuMemsetD8Async", {"hipMemsetD8Async", "", CONV_MEMORY, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", "", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.12. Virtual Memory Management - // no analogue - {"cuMemAddressFree", {"hipMemAddressFree", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAddressReserve", {"hipMemAddressReserve", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemCreate", {"hipMemCreate", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemExportToShareableHandle", {"hipMemExportToShareableHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAccess", {"hipMemGetAccess", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAllocationGranularity", {"hipMemGetAllocationGranularity", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetAllocationPropertiesFromHandle", {"hipMemGetAllocationPropertiesFromHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemImportFromShareableHandle", {"hipMemImportFromShareableHandle", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemMap", {"hipMemMap", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemRelease", {"hipMemRelease", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemSetAccess", {"hipMemSetAccess", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemUnmap", {"hipMemUnmap", "", CONV_VIRTUAL_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.13. Unified Addressing - // cudaMemAdvise - {"cuMemAdvise", {"hipMemAdvise", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // TODO: double check cudaMemPrefetchAsync - {"cuMemPrefetchAsync", {"hipMemPrefetchAsync_", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemRangeGetAttribute - {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaMemRangeGetAttributes - {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuPointerGetAttribute", {"hipPointerGetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaPointerGetAttributes due to different signatures - {"cuPointerGetAttributes", {"hipPointerGetAttributes_", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuPointerSetAttribute", {"hipPointerSetAttribute", "", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.14. Stream Management - // cudaStreamAddCallback - {"cuStreamAddCallback", {"hipStreamAddCallback", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamAttachMemAsync - {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamBeginCapture - {"cuStreamBeginCapture", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamBeginCapture_v2", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamBeginCapture_ptsz", {"hipStreamBeginCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamCreateWithFlags - {"cuStreamCreate", {"hipStreamCreateWithFlags", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamCreateWithPriority - {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamDestroy - {"cuStreamDestroy", {"hipStreamDestroy", "", CONV_STREAM, API_DRIVER}}, - {"cuStreamDestroy_v2", {"hipStreamDestroy", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamEndCapture - {"cuStreamEndCapture", {"hipStreamEndCapture", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamGetCaptureInfo - {"cuStreamGetCaptureInfo", {"hipStreamGetCaptureInfo", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuStreamGetCtx", {"hipStreamGetContext", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamGetFlags - {"cuStreamGetFlags", {"hipStreamGetFlags", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamGetPriority - {"cuStreamGetPriority", {"hipStreamGetPriority", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamIsCapturing - {"cuStreamIsCapturing", {"hipStreamIsCapturing", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaStreamQuery - {"cuStreamQuery", {"hipStreamQuery", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamSynchronize - {"cuStreamSynchronize", {"hipStreamSynchronize", "", CONV_STREAM, API_DRIVER}}, - // cudaStreamWaitEvent - {"cuStreamWaitEvent", {"hipStreamWaitEvent", "", CONV_STREAM, API_DRIVER}}, - // cudaThreadExchangeStreamCaptureMode - {"cuThreadExchangeStreamCaptureMode", {"hipThreadExchangeStreamCaptureMode", "", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.15. Event Management - // cudaEventCreateWithFlags - {"cuEventCreate", {"hipEventCreateWithFlags", "", CONV_EVENT, API_DRIVER}}, - // cudaEventDestroy - {"cuEventDestroy", {"hipEventDestroy", "", CONV_EVENT, API_DRIVER}}, - {"cuEventDestroy_v2", {"hipEventDestroy", "", CONV_EVENT, API_DRIVER}}, - // cudaEventElapsedTime - {"cuEventElapsedTime", {"hipEventElapsedTime", "", CONV_EVENT, API_DRIVER}}, - // cudaEventQuery - {"cuEventQuery", {"hipEventQuery", "", CONV_EVENT, API_DRIVER}}, - // cudaEventRecord - {"cuEventRecord", {"hipEventRecord", "", CONV_EVENT, API_DRIVER}}, - // cudaEventSynchronize - {"cuEventSynchronize", {"hipEventSynchronize", "", CONV_EVENT, API_DRIVER}}, - - // 5.16. External Resource Interoperability - // cudaDestroyExternalMemory - {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroyExternalSemaphore - {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemoryGetMappedBuffer - {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemoryGetMappedMipmappedArray - {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaImportExternalMemory - {"cuImportExternalMemory", {"hipImportExternalMemory", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaImportExternalSemaphore - {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaSignalExternalSemaphoresAsync - {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaWaitExternalSemaphoresAsync - {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", "", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.17. Stream Memory Operations - // no analogues - {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWaitValue32", {"hipStreamWaitValue32", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWaitValue64", {"hipStreamWaitValue64", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWriteValue32", {"hipStreamWriteValue32", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamWriteValue64", {"hipStreamWriteValue64", "", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.18.Execution Control - // no analogue - {"cuFuncGetAttribute", {"hipFuncGetAttribute", "", CONV_EXECUTION, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaFuncSetAttribute due to different signatures - {"cuFuncSetAttribute", {"hipFuncSetAttribute", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures - {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaFuncSetSharedMemConfig due to different signatures - {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchCooperativeKernel due to different signatures - {"cuLaunchCooperativeKernel", {"hipLaunchCooperativeKernel_", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchCooperativeKernelMultiDevice due to different signatures - {"cuLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice_", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaLaunchHostFunc - {"cuLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunchKernel due to different signatures - {"cuLaunchKernel", {"hipModuleLaunchKernel", "", CONV_EXECUTION, API_DRIVER}}, - - // 5.19.Execution Control [DEPRECATED] - // no analogue - {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaLaunch due to different signatures - {"cuLaunch", {"hipLaunch", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuLaunchGrid", {"hipLaunchGrid", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuLaunchGridAsync", {"hipLaunchGridAsync", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetf", {"hipParamSetf", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSeti", {"hipParamSeti", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetSize", {"hipParamSetSize", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetTexRef", {"hipParamSetTexRef", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuParamSetv", {"hipParamSetv", "", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.20. Graph Management - // cudaGraphAddChildGraphNode - {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddDependencies - {"cuGraphAddDependencies", {"hipGraphAddDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddEmptyNode - {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddHostNode - {"cuGraphAddHostNode", {"hipGraphAddHostNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddKernelNode - {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddMemcpyNode - {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphAddMemsetNode - {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphChildGraphNodeGetGraph - {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphClone - {"cuGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphCreate - {"cuGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphDestroy - {"cuGraphDestroy", {"hipGraphDestroy", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphDestroyNode - {"cuGraphDestroyNode", {"hipGraphDestroyNode", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecDestroy - {"cuGraphExecDestroy", {"hipGraphExecDestroy", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetEdges - {"cuGraphGetEdges", {"hipGraphGetEdges", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetNodes - {"cuGraphGetNodes", {"hipGraphGetNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphGetRootNodes - {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphHostNodeGetParams - {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphHostNodeSetParams - {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphInstantiate - {"cuGraphInstantiate", {"hipGraphInstantiate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecKernelNodeSetParams - {"cuGraphExecKernelNodeSetParams", {"hipGraphExecKernelNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphKernelNodeGetParams - {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphKernelNodeSetParams - {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphLaunch - {"cuGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemcpyNodeGetParams - {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemcpyNodeSetParams - {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemsetNodeGetParams - {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphMemsetNodeSetParams - {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeFindInClone - {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetDependencies - {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetDependentNodes - {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeGetType - {"cuGraphNodeGetType", {"hipGraphNodeGetType", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphRemoveDependencies - {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecMemcpyNodeSetParams - {"cuGraphExecMemcpyNodeSetParams", {"hipGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecMemsetNodeSetParams - {"cuGraphExecMemsetNodeSetParams", {"hipGraphExecMemsetNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecHostNodeSetParams - {"cuGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExecUpdate - {"cuGraphExecUpdate", {"hipGraphExecUpdate", "", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.21. Occupancy - // cudaOccupancyMaxActiveBlocksPerMultiprocessor - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxPotentialBlockSize - {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, - // cudaOccupancyMaxPotentialBlockSizeWithFlags - {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.22. Texture Reference Management [DEPRECATED] - // no analogues - {"cuTexRefGetAddress", {"hipTexRefGetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetAddress_v2", {"hipTexRefGetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetArray", {"hipTexRefGetArray", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFlags", {"hipTexRefGetFlags", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFormat", {"hipTexRefGetFormat", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress", {"hipTexRefSetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress_v2", {"hipTexRefSetAddress", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D_v2", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddress2D_v3", {"hipTexRefSetAddress2D", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetArray", {"hipTexRefSetArray", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFlags", {"hipTexRefSetFlags", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFormat", {"hipTexRefSetFormat", "", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefCreate", {"hipTexRefCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefDestroy", {"hipTexRefDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.23. Surface Reference Management [DEPRECATED] - // no analogues - {"cuSurfRefGetArray", {"hipSurfRefGetArray", "", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfRefSetArray", {"hipSurfRefSetArray", "", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.24. Texture Object Management - // no analogue - // NOTE: Not equal to cudaCreateTextureObject due to different signatures - {"cuTexObjectCreate", {"hipTexObjectCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroyTextureObject - {"cuTexObjectDestroy", {"hipTexObjectDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectResourceDesc due to different signatures - {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGetTextureObjectResourceViewDesc - {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures - {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.25. Surface Object Management - // no analogue - // NOTE: Not equal to cudaCreateSurfaceObject due to different signatures - {"cuSurfObjectCreate", {"hipSurfObjectCreate", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaDestroySurfaceObject - {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGetSurfaceObjectResourceDesc due to different signatures - {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", "", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.26. Peer Context Memory Access - // no analogue - // NOTE: Not equal to cudaDeviceEnablePeerAccess due to different signatures - {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", "", CONV_PEER, API_DRIVER}}, - // no analogue - // NOTE: Not equal to cudaDeviceDisablePeerAccess due to different signatures - {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", "", CONV_PEER, API_DRIVER}}, - // cudaDeviceCanAccessPeer - {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", "", CONV_PEER, API_DRIVER}}, - // cudaDeviceGetP2PAttribute - {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", "", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.27. Graphics Interoperability - // cudaGraphicsMapResources - {"cuGraphicsMapResources", {"hipGraphicsMapResources", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedMipmappedArray - {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedPointer - {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedPointer - {"cuGraphicsResourceGetMappedPointer_v2", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceSetMapFlags - {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceSetMapFlags - {"cuGraphicsResourceSetMapFlags_v2", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsSubResourceGetMappedArray - {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsUnmapResources - {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsUnregisterResource - {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", "", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.28. Profiler Control - // cudaProfilerInitialize - {"cuProfilerInitialize", {"hipProfilerInitialize", "", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaProfilerStart - {"cuProfilerStart", {"hipProfilerStart", "", CONV_PROFILER, API_DRIVER}}, - // cudaProfilerStop - {"cuProfilerStop", {"hipProfilerStop", "", CONV_PROFILER, API_DRIVER}}, - - // 5.29. OpenGL Interoperability - // cudaGLGetDevices - {"cuGLGetDevices", {"hipGLGetDevices", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsGLRegisterBuffer - {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsGLRegisterImage - {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaWGLGetDevice - {"cuWGLGetDevice", {"hipWGLGetDevice", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.29. OpenGL Interoperability [DEPRECATED] - // no analogue - {"cuGLCtxCreate", {"hipGLCtxCreate", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuGLInit", {"hipGLInit", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGLMapBufferObject due to different signatures - {"cuGLMapBufferObject", {"hipGLMapBufferObject_", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cudaGLMapBufferObjectAsync due to different signatures - {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync_", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLRegisterBufferObject - {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLSetBufferObjectMapFlags - {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnmapBufferObject - {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnmapBufferObjectAsync - {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGLUnregisterBufferObject - {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", "", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.30.Direct3D 9 Interoperability - // no analogue - {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDevice - {"cuD3D9GetDevice", {"hipD3D9GetDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDevices - {"cuD3D9GetDevices", {"hipD3D9GetDevices", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9GetDirect3DDevice - {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D9RegisterResource - {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.30.Direct3D 9 Interoperability [DEPRECATED] - // cudaD3D9MapResources - {"cuD3D9MapResources", {"hipD3D9MapResources", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9RegisterResource - {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedArray - {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPitch - {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPointer - {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedSize - {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetSurfaceDimensions - {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceSetMapFlags - {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9UnmapResources - {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D9UnregisterResource - {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", "", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.31. Direct3D 10 Interoperability - // cudaD3D10GetDevice - {"cuD3D10GetDevice", {"hipD3D10GetDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10GetDevices - {"cuD3D10GetDevices", {"hipD3D10GetDevices", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D10RegisterResource - {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.31. Direct3D 10 Interoperability [DEPRECATED] - // no analogue - {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10GetDirect3DDevice - {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10MapResources - {"cuD3D10MapResources", {"hipD3D10MapResources", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10RegisterResource - {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedArray - {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPitch - {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPointer - {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedSize - {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetSurfaceDimensions - {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceSetMapFlags - {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10UnmapResources - {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D10UnregisterResource - {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", "", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.32. Direct3D 11 Interoperability - // cudaD3D11GetDevice - {"cuD3D11GetDevice", {"hipD3D11GetDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D11GetDevices - {"cuD3D11GetDevices", {"hipD3D11GetDevices", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsD3D11RegisterResource - {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.32. Direct3D 11 Interoperability [DEPRECATED] - // no analogue - {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaD3D11GetDirect3DDevice - {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", "", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.33. VDPAU Interoperability - // cudaGraphicsVDPAURegisterOutputSurface - {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsVDPAURegisterVideoSurface - {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaVDPAUGetDevice - {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", "", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - - // 5.34. EGL Interoperability - // cudaEGLStreamConsumerAcquireFrame - {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerConnect - {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerConnectWithFlags - {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerDisconnect - {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamConsumerReleaseFrame - {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerConnect - {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerDisconnect - {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerPresentFrame - {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEGLStreamProducerReturnFrame - {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsEGLRegisterImage - {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResourceGetMappedEglFrame - {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEventCreateFromEGLSync - {"cuEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", "", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp deleted file mode 100644 index 054de19800..0000000000 --- a/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ /dev/null @@ -1,1617 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA DRIVER API types to the corresponding HIP types -const std::map CUDA_DRIVER_TYPE_NAME_MAP{ - - // 1. Structs - - {"CUDA_ARRAY3D_DESCRIPTOR_st", {"HIP_ARRAY3D_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_ARRAY3D_DESCRIPTOR", {"HIP_ARRAY3D_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - - {"CUDA_ARRAY_DESCRIPTOR_st", {"HIP_ARRAY_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_ARRAY_DESCRIPTOR", {"HIP_ARRAY_DESCRIPTOR", "", CONV_TYPE, API_DRIVER}}, - - // cudaExternalMemoryBufferDesc - {"CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_BUFFER_DESC", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalMemoryHandleDesc - {"CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_HANDLE_DESC", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalMemoryMipmappedArrayDesc - {"CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreHandleDesc - {"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreSignalParams - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaExternalSemaphoreWaitParams - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaHostNodeParams - {"CUDA_HOST_NODE_PARAMS_st", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_HOST_NODE_PARAMS", {"hipHostNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaKernelNodeParams - {"CUDA_KERNEL_NODE_PARAMS_st", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_KERNEL_NODE_PARAMS", {"hipKernelNodeParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaLaunchParams struct differs - {"CUDA_LAUNCH_PARAMS_st", {"hipLaunchParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_LAUNCH_PARAMS", {"hipLaunchParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_MEMCPY2D_st", {"hip_Memcpy2D", "", CONV_TYPE, API_DRIVER}}, - {"CUDA_MEMCPY2D", {"hip_Memcpy2D", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUDA_MEMCPY3D_st", {"hip_Memcpy3D", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMCPY3D", {"hip_Memcpy3D", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_MEMCPY3D_PEER_st", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMCPY3D_PEER", {"hip_Memcpy3D_Peer", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMemsetParams - {"CUDA_MEMSET_NODE_PARAMS_st", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMSET_NODE_PARAMS", {"hipMemsetParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaResourceDesc struct differs - {"CUDA_RESOURCE_DESC_st", {"HIP_RESOURCE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_RESOURCE_DESC", {"HIP_RESOURCE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaResourceViewDesc - // NOTE: cudaResourceViewDesc hasn't reserved bytes in the end - {"CUDA_RESOURCE_VIEW_DESC_st", {"HIP_RESOURCE_VIEW_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_RESOURCE_VIEW_DESC", {"HIP_RESOURCE_VIEW_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaTextureDesc differs - {"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: cudaDeviceProp differs - {"CUdevprop_st", {"hipDeviceProp_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevprop", {"hipDeviceProp_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaIpcEventHandle_st - {"CUipcEventHandle_st", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_DRIVER}}, - // cudaIpcEventHandle_t - {"CUipcEventHandle", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaIpcMemHandle_st - {"CUipcMemHandle_st", {"hipIpcMemHandle_st", "", CONV_TYPE, API_DRIVER}}, - // cudaIpcMemHandle_t - {"CUipcMemHandle", {"hipIpcMemHandle_t", "", CONV_TYPE, API_DRIVER}}, - - // CUDA: "The types CUarray and cudaArray * represent the same data type and may be used interchangeably by casting the two types between each other." - // cudaArray - {"CUarray_st", {"hipArray", "", CONV_TYPE, API_DRIVER}}, - // cudaArray_t - {"CUarray", {"hipArray *", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUctx_st", {"ihipCtx_t", "", CONV_TYPE, API_DRIVER}}, - {"CUcontext", {"hipCtx_t", "", CONV_TYPE, API_DRIVER}}, - - // CUeglStreamConnection_st - {"CUeglStreamConnection_st", {"hipEglStreamConnection", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaEglStreamConnection - {"CUeglStreamConnection", {"hipEglStreamConnection *", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same - CUevent_st - {"CUevent_st", {"ihipEvent_t", "", CONV_TYPE, API_DRIVER}}, - // cudaEvent_t - {"CUevent", {"hipEvent_t", "", CONV_TYPE, API_DRIVER}}, - - // CUexternalMemory_st - {"CUextMemory_st", {"hipExtMemory_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalMemory_t - {"CUexternalMemory", {"hipExternalMemory", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // CUexternalSemaphore_st - {"CUextSemaphore_st", {"hipExtSemaphore_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaExternalSemaphore_t - {"CUexternalSemaphore", {"hipExternalSemaphore", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUfunc_st", {"ihipModuleSymbol_t", "", CONV_TYPE, API_DRIVER}}, - {"CUfunction", {"hipFunction_t", "", CONV_TYPE, API_DRIVER}}, - - // the same - CUgraph_st - {"CUgraph_st", {"hipGraph_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraph_t - {"CUgraph", {"hipGraph", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same -CUgraphExec_st - {"CUgraphExec_st", {"hipGraphExec_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphExec_t - {"CUgraphExec", {"hipGraphExec", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaGraphicsResource - {"CUgraphicsResource_st", {"hipGraphicsResource_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsResource_t - {"CUgraphicsResource", {"hipGraphicsResource_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // the same - CUgraphNode_st - {"CUgraphNode_st", {"hipGraphNode_st", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNode_t - {"CUgraphNode", {"hipGraphNode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMipmappedArray - {"CUmipmappedArray_st", {"hipMipmappedArray_st", "", CONV_TYPE, API_DRIVER}}, - // cudaMipmappedArray_t - {"CUmipmappedArray", {"hipMipmappedArray_t", "", CONV_TYPE, API_DRIVER}}, - - // no analogue - {"CUmod_st", {"ihipModule_t", "", CONV_TYPE, API_DRIVER}}, - {"CUmodule", {"hipModule_t", "", CONV_TYPE, API_DRIVER}}, - - // the same - CUstream_st - {"CUstream_st", {"ihipStream_t", "", CONV_TYPE, API_DRIVER}}, - // cudaStream_t - {"CUstream", {"hipStream_t", "", CONV_TYPE, API_DRIVER}}, - - // NOTE: possibly surfaceReference is analogue - {"CUsurfref_st", {"ihipSurfaceReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUsurfref", {"hipSurfaceReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // textureReference - {"CUtexref_st", {"textureReference", "", CONV_TYPE, API_DRIVER}}, - {"CUtexref", {"hipTextureReference_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // CUuuid_st - // NOTE: the same struct and its name - {"CUuuid_st", {"hipUUID", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUuuid", {"hipUUID", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemLocation_st", {"hipMemoryLocation", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemLocation", {"hipMemoryLocation", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemAllocationProp_st", {"hipMemoryAllocationProperties", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationProp", {"hipMemoryAllocationProperties", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUmemAccessDesc_st", {"hipMemoryAccessDescription", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAccessDesc", {"hipMemoryAccessDescription", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 2. Unions - - {"CUstreamBatchMemOpParams", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpParams_union", {"hipStreamBatchMemOpParams", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // 3. Enums - {"CUaddress_mode", {"hipTextureAddressMode", "", CONV_TYPE, API_DRIVER}}, - {"CUaddress_mode_enum", {"hipTextureAddressMode", "", CONV_TYPE, API_DRIVER}}, - // CUaddress_mode enum values - {"CU_TR_ADDRESS_MODE_WRAP", {"hipAddressModeWrap", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - {"CU_TR_ADDRESS_MODE_CLAMP", {"hipAddressModeClamp", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - {"CU_TR_ADDRESS_MODE_MIRROR", {"hipAddressModeMirror", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - {"CU_TR_ADDRESS_MODE_BORDER", {"hipAddressModeBorder", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - - {"CUarray_cubemap_face", {"hipGraphicsCubeFace", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUarray_cubemap_face_enum", {"hipGraphicsCubeFace", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUarray_cubemap_face enum values - // cudaGraphicsCubeFacePositiveX - {"CU_CUBEMAP_FACE_POSITIVE_X", {"hipGraphicsCubeFacePositiveX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsCubeFaceNegativeX - {"CU_CUBEMAP_FACE_NEGATIVE_X", {"hipGraphicsCubeFaceNegativeX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGraphicsCubeFacePositiveY - {"CU_CUBEMAP_FACE_POSITIVE_Y", {"hipGraphicsCubeFacePositiveY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGraphicsCubeFaceNegativeY - {"CU_CUBEMAP_FACE_NEGATIVE_Y", {"hipGraphicsCubeFaceNegativeY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaGraphicsCubeFacePositiveZ - {"CU_CUBEMAP_FACE_POSITIVE_Z", {"hipGraphicsCubeFacePositiveZ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaGraphicsCubeFaceNegativeZ - {"CU_CUBEMAP_FACE_NEGATIVE_Z", {"hipGraphicsCubeFaceNegativeZ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - - {"CUarray_format", {"hipArray_format", "", CONV_TYPE, API_DRIVER}}, - {"CUarray_format_enum", {"hipArray_format", "", CONV_TYPE, API_DRIVER}}, - // CUarray_format enum values - {"CU_AD_FORMAT_UNSIGNED_INT8", {"HIP_AD_FORMAT_UNSIGNED_INT8", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - {"CU_AD_FORMAT_UNSIGNED_INT16", {"HIP_AD_FORMAT_UNSIGNED_INT16", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - {"CU_AD_FORMAT_UNSIGNED_INT32", {"HIP_AD_FORMAT_UNSIGNED_INT32", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - {"CU_AD_FORMAT_SIGNED_INT8", {"HIP_AD_FORMAT_SIGNED_INT8", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - {"CU_AD_FORMAT_SIGNED_INT16", {"HIP_AD_FORMAT_SIGNED_INT16", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x09 - {"CU_AD_FORMAT_SIGNED_INT32", {"HIP_AD_FORMAT_SIGNED_INT32", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0a - {"CU_AD_FORMAT_HALF", {"HIP_AD_FORMAT_HALF", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - {"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x20 - - // cudaComputeMode - {"CUcomputemode", {"hipComputeMode", "", CONV_TYPE, API_DRIVER}}, - {"CUcomputemode_enum", {"hipComputeMode", "", CONV_TYPE, API_DRIVER}}, - // CUcomputemode enum values - // cudaComputeModeDefault - {"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaComputeModeExclusive - // NOTE: Deprecated since CUDA 10.0 - {"CU_COMPUTEMODE_EXCLUSIVE", {"hipComputeModeExclusive", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaComputeModeProhibited - {"CU_COMPUTEMODE_PROHIBITED", {"hipComputeModeProhibited", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaComputeModeExclusiveProcess - {"CU_COMPUTEMODE_EXCLUSIVE_PROCESS", {"hipComputeModeExclusiveProcess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - - {"CUctx_flags", {"hipCctx_flags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUctx_flags_enum", {"hipCctx_flags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUctx_flags enum values - // cudaDeviceScheduleAuto - {"CU_CTX_SCHED_AUTO", {"hipDeviceScheduleAuto", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaDeviceScheduleSpin - {"CU_CTX_SCHED_SPIN", {"hipDeviceScheduleSpin", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaDeviceScheduleYield - {"CU_CTX_SCHED_YIELD", {"hipDeviceScheduleYield", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaDeviceScheduleBlockingSync - {"CU_CTX_SCHED_BLOCKING_SYNC", {"hipDeviceScheduleBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaDeviceBlockingSync - // NOTE: Deprecated since CUDA 4.0 and replaced with CU_CTX_SCHED_BLOCKING_SYNC - {"CU_CTX_BLOCKING_SYNC", {"hipDeviceScheduleBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaDeviceScheduleMask - {"CU_CTX_SCHED_MASK", {"hipDeviceScheduleMask", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x07 - // cudaDeviceMapHost - {"CU_CTX_MAP_HOST", {"hipDeviceMapHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - // cudaDeviceLmemResizeToMax - {"CU_CTX_LMEM_RESIZE_TO_MAX", {"hipDeviceLmemResizeToMax", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - // cudaDeviceMask - {"CU_CTX_FLAGS_MASK", {"hipDeviceMask", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1f - - // cudaDeviceAttr - {"CUdevice_attribute", {"hipDeviceAttribute_t", "", CONV_TYPE, API_DRIVER}}, - {"CUdevice_attribute_enum", {"hipDeviceAttribute_t", "", CONV_TYPE, API_DRIVER}}, - // CUdevice_attribute enum values - // cudaDevAttrMaxThreadsPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK", {"hipDeviceAttributeMaxThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaDevAttrMaxBlockDimX - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X", {"hipDeviceAttributeMaxBlockDimX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaDevAttrMaxBlockDimY - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y", {"hipDeviceAttributeMaxBlockDimY", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // cudaDevAttrMaxBlockDimZ - {"CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z", {"hipDeviceAttributeMaxBlockDimZ", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // cudaDevAttrMaxGridDimX - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X", {"hipDeviceAttributeMaxGridDimX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // cudaDevAttrMaxGridDimY - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y", {"hipDeviceAttributeMaxGridDimY", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // cudaDevAttrMaxGridDimZ - {"CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z", {"hipDeviceAttributeMaxGridDimZ", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaDevAttrMaxSharedMemoryPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // no analogue - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK - {"CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaDevAttrTotalConstantMemory - {"CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY", {"hipDeviceAttributeTotalConstantMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 9 - // cudaDevAttrWarpSize - {"CU_DEVICE_ATTRIBUTE_WARP_SIZE", {"hipDeviceAttributeWarpSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 10 - // cudaDevAttrMaxPitch - {"CU_DEVICE_ATTRIBUTE_MAX_PITCH", {"hipDeviceAttributeMaxPitch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 11 - // cudaDevAttrMaxRegistersPerBlock - {"CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 12 - // no analogue - {"CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 12 - // cudaDevAttrClockRate - {"CU_DEVICE_ATTRIBUTE_CLOCK_RATE", {"hipDeviceAttributeClockRate", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 13 - // cudaDevAttrTextureAlignment - {"CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT", {"hipDeviceAttributeTextureAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 14 - // cudaDevAttrGpuOverlap - // NOTE: Deprecated, use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT - {"CU_DEVICE_ATTRIBUTE_GPU_OVERLAP", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 15 - // cudaDevAttrMultiProcessorCount - {"CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT", {"hipDeviceAttributeMultiprocessorCount", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 16 - // cudaDevAttrKernelExecTimeout - {"CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT", {"hipDeviceAttributeKernelExecTimeout", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 17 - // cudaDevAttrIntegrated - {"CU_DEVICE_ATTRIBUTE_INTEGRATED", {"hipDeviceAttributeIntegrated", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 18 - // cudaDevAttrCanMapHostMemory - {"CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY", {"hipDeviceAttributeCanMapHostMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 19 - // cudaDevAttrComputeMode - {"CU_DEVICE_ATTRIBUTE_COMPUTE_MODE", {"hipDeviceAttributeComputeMode", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 20 - // cudaDevAttrMaxTexture1DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH", {"hipDeviceAttributeMaxTexture1DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 21 - // cudaDevAttrMaxTexture2DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH", {"hipDeviceAttributeMaxTexture2DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 22 - // cudaDevAttrMaxTexture2DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT", {"hipDeviceAttributeMaxTexture2DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 23 - // cudaDevAttrMaxTexture3DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH", {"hipDeviceAttributeMaxTexture3DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 24 - // cudaDevAttrMaxTexture3DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT", {"hipDeviceAttributeMaxTexture3DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 25 - // cudaDevAttrMaxTexture3DDepth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH", {"hipDeviceAttributeMaxTexture3DDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 26 - // cudaDevAttrMaxTexture2DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 27 - // cudaDevAttrMaxTexture2DLayeredHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 28 - // cudaDevAttrMaxTexture2DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 29 - // cudaDevAttrMaxTexture2DLayeredWidth - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 27 - // cudaDevAttrMaxTexture2DLayeredHeight - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 28 - // cudaDevAttrMaxTexture2DLayeredLayers - // NOTE: Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 29 - // cudaDevAttrSurfaceAlignment - {"CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT", {"hipDeviceAttributeSurfaceAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 30 - // cudaDevAttrConcurrentKernels - {"CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS", {"hipDeviceAttributeConcurrentKernels", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 31 - // cudaDevAttrEccEnabled - {"CU_DEVICE_ATTRIBUTE_ECC_ENABLED", {"hipDeviceAttributeEccEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 32 - // cudaDevAttrPciBusId - {"CU_DEVICE_ATTRIBUTE_PCI_BUS_ID", {"hipDeviceAttributePciBusId", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 33 - // cudaDevAttrPciDeviceId - {"CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID", {"hipDeviceAttributePciDeviceId", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 34 - // cudaDevAttrTccDriver - {"CU_DEVICE_ATTRIBUTE_TCC_DRIVER", {"hipDeviceAttributeTccDriver", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 35 - // cudaDevAttrMemoryClockRate - {"CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE", {"hipDeviceAttributeMemoryClockRate", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 36 - // cudaDevAttrGlobalMemoryBusWidth - {"CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH", {"hipDeviceAttributeMemoryBusWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 37 - // cudaDevAttrL2CacheSize - {"CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE", {"hipDeviceAttributeL2CacheSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 38 - // cudaDevAttrMaxThreadsPerMultiProcessor - {"CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxThreadsPerMultiProcessor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 39 - // cudaDevAttrAsyncEngineCount - {"CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 40 - // cudaDevAttrUnifiedAddressing - {"CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING", {"hipDeviceAttributeUnifiedAddressing", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 41 - // cudaDevAttrMaxTexture1DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH", {"hipDeviceAttributeMaxTexture1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 42 - // cudaDevAttrMaxTexture1DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS", {"hipDeviceAttributeMaxTexture1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 43 - // no analogue - // NOTE: Deprecated, do not use - {"CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER", {"hipDeviceAttributeCanTex2DGather", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 44 - // cudaDevAttrMaxTexture2DGatherWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH", {"hipDeviceAttributeMaxTexture2DGatherWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 45 - // cudaDevAttrMaxTexture2DGatherHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT", {"hipDeviceAttributeMaxTexture2DGatherHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 46 - // cudaDevAttrMaxTexture3DWidthAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE", {"hipDeviceAttributeMaxTexture3DWidthAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 47 - // cudaDevAttrMaxTexture3DHeightAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE", {"hipDeviceAttributeMaxTexture3DHeightAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 48 - // cudaDevAttrMaxTexture3DDepthAlt - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE", {"hipDeviceAttributeMaxTexture3DDepthAlternate", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 49 - // cudaDevAttrPciDomainId - {"CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID", {"hipDeviceAttributePciDomainId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 50 - // cudaDevAttrTexturePitchAlignment - {"CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT", {"hipDeviceAttributeTexturePitchAlignment", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 51 - // cudaDevAttrMaxTextureCubemapWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH", {"hipDeviceAttributeMaxTextureCubemapWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 52 - // cudaDevAttrMaxTextureCubemapLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH", {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 53 - // cudaDevAttrMaxTextureCubemapLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS", {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 54 - // cudaDevAttrMaxSurface1DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH", {"hipDeviceAttributeMaxSurface1DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 55 - // cudaDevAttrMaxSurface2DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH", {"hipDeviceAttributeMaxSurface2DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 56 - // cudaDevAttrMaxSurface2DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT", {"hipDeviceAttributeMaxSurface2DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 57 - // cudaDevAttrMaxSurface3DWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH", {"hipDeviceAttributeMaxSurface3DWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 58 - // cudaDevAttrMaxSurface3DHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT", {"hipDeviceAttributeMaxSurface3DHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 59 - // cudaDevAttrMaxSurface3DDepth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH", {"hipDeviceAttributeMaxSurface3DDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 60 - // cudaDevAttrMaxSurface1DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurface1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 61 - // cudaDevAttrMaxSurface1DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurface1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 62 - // cudaDevAttrMaxSurface2DLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurface2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 63 - // cudaDevAttrMaxSurface2DLayeredHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT", {"hipDeviceAttributeMaxSurface2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 64 - // cudaDevAttrMaxSurface2DLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurface2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 65 - // cudaDevAttrMaxSurfaceCubemapWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH", {"hipDeviceAttributeMaxSurfaceCubemapWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 66 - // cudaDevAttrMaxSurfaceCubemapLayeredWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH", {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 67 - // cudaDevAttrMaxSurfaceCubemapLayeredLayers - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS", {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 68 - // cudaDevAttrMaxTexture1DLinearWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH", {"hipDeviceAttributeMaxTexture1DLinearWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 69 - // cudaDevAttrMaxTexture2DLinearWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH", {"hipDeviceAttributeMaxTexture2DLinearWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - // cudaDevAttrMaxTexture2DLinearHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT", {"hipDeviceAttributeMaxTexture2DLinearHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 71 - // cudaDevAttrMaxTexture2DLinearPitch - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH", {"hipDeviceAttributeMaxTexture2DLinearPitch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 72 - // cudaDevAttrMaxTexture2DMipmappedWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH", {"hipDeviceAttributeMaxTexture2DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 73 - // cudaDevAttrMaxTexture2DMipmappedHeight - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT", {"hipDeviceAttributeMaxTexture2DMipmappedHeight", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 74 - // cudaDevAttrComputeCapabilityMajor - {"CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR", {"hipDeviceAttributeComputeCapabilityMajor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 75 - // cudaDevAttrComputeCapabilityMinor - {"CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR", {"hipDeviceAttributeComputeCapabilityMinor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 76 - // cudaDevAttrMaxTexture1DMipmappedWidth - {"CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH", {"hipDeviceAttributeMaxTexture1DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 77 - // cudaDevAttrStreamPrioritiesSupported - {"CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED", {"hipDeviceAttributeStreamPrioritiesSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 78 - // cudaDevAttrGlobalL1CacheSupported - {"CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED", {"hipDeviceAttributeGlobalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 79 - // cudaDevAttrLocalL1CacheSupported - {"CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED", {"hipDeviceAttributeLocalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 80 - // cudaDevAttrMaxSharedMemoryPerMultiprocessor - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 81 - // cudaDevAttrMaxRegistersPerMultiprocessor - {"CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR", {"hipDeviceAttributeMaxRegistersPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 82 - // cudaDevAttrManagedMemory - {"CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY", {"hipDeviceAttributeManagedMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 83 - // cudaDevAttrIsMultiGpuBoard - {"CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD", {"hipDeviceAttributeIsMultiGpuBoard", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 84 - // cudaDevAttrMultiGpuBoardGroupID - {"CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID", {"hipDeviceAttributeMultiGpuBoardGroupId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 85 - // cudaDevAttrHostNativeAtomicSupported - {"CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED", {"hipDeviceAttributeHostNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 86 - // cudaDevAttrSingleToDoublePrecisionPerfRatio - {"CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO", {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 87 - // cudaDevAttrPageableMemoryAccess - {"CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS", {"hipDeviceAttributePageableMemoryAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 88 - // cudaDevAttrConcurrentManagedAccess - {"CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS", {"hipDeviceAttributeConcurrentManagedAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 89 - // cudaDevAttrComputePreemptionSupported - {"CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED", {"hipDeviceAttributeComputePreemptionSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 90 - // cudaDevAttrCanUseHostPointerForRegisteredMem - {"CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM", {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 91 - // no analogue: cudaDevAttrReserved92 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS", {"hipDeviceAttributeCanUseStreamMemOps", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 92 - // no analogue: cudaDevAttrReserved93 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS", {"hipDeviceAttributeCanUse64BitStreamMemOps", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 93 - // no analogue: cudaDevAttrReserved94 - {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 94 - // cudaDevAttrCooperativeLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 95 - // cudaDevAttrCooperativeMultiDeviceLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 96 - // cudaDevAttrMaxSharedMemoryPerBlockOptin - {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 97 - // cudaDevAttrCanFlushRemoteWrites - {"CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES", {"hipDeviceAttributeCanFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 98 - // cudaDevAttrHostRegisterSupported - {"CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED", {"hipDeviceAttributeHostRegisterSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 99 - // cudaDevAttrPageableMemoryAccessUsesHostPageTables - {"CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES", {"hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 100 - // cudaDevAttrDirectManagedMemAccessFromHost - {"CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST", {"hipDeviceAttributeDirectManagedMemAccessFromHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 101 - // no analogue - {"CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED", {"hipDeviceAttributeVirtualAddressManagementSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 102 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED", {"hipDeviceAttributeHandleTypePosixFileDescriptorSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 103 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED", {"hipDeviceAttributeHandleTypeWin32HandleSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 104 - // no analogue - {"CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED", {"hipDeviceAttributeHandleTypeWin32KmtHandleSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 105 - // no analogue - {"CU_DEVICE_ATTRIBUTE_MAX", {"hipDeviceAttributeMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 106 - - // cudaDeviceP2PAttr - {"CUdevice_P2PAttribute", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevice_P2PAttribute_enum", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUdevice_P2PAttribute enum values - // cudaDevP2PAttrPerformanceRank = 1 - {"CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK", {"hipDeviceP2PAttributePerformanceRank", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaDevP2PAttrAccessSupported = 2 - {"CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED", {"hipDeviceP2PAttributeAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaDevP2PAttrNativeAtomicSupported = 3 - {"CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED", {"hipDeviceP2PAttributeNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaDevP2PAttrCudaArrayAccessSupported = 4 - // NOTE" deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead - {"CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // NOTE" deprecated, use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead - {"CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaDevP2PAttrCudaArrayAccessSupported = 4 - {"CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - - // cudaEGL.h - presented only on Linux in nvidia-cuda-dev package - // cudaEglColorFormat - {"CUeglColorFormat", {"hipEglColorFormat", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglColorFormate_enum", {"hipEglColorFormat", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglColorFormat enum values - // cudaEglColorFormatYUV420Planar = 0 - {"CU_EGL_COLOR_FORMAT_YUV420_PLANAR", {"hipEglColorFormatYUV420Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaEglColorFormatYUV420SemiPlanar = 1 - {"CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR", {"hipEglColorFormatYUV420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaEglColorFormatYUV422Planar = 2 - {"CU_EGL_COLOR_FORMAT_YUV422_PLANAR", {"hipEglColorFormatYUV422Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaEglColorFormatYUV422SemiPlanar = 3 - {"CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR", {"hipEglColorFormatYUV422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaEglColorFormatRGB = 4 - {"CU_EGL_COLOR_FORMAT_RGB", {"hipEglColorFormatRGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaEglColorFormatBGR = 5 - {"CU_EGL_COLOR_FORMAT_BGR", {"hipEglColorFormatBGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - // cudaEglColorFormatARGB = 6 - {"CU_EGL_COLOR_FORMAT_ARGB", {"hipEglColorFormatARGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x06 - // cudaEglColorFormatRGBA = 7 - {"CU_EGL_COLOR_FORMAT_RGBA", {"hipEglColorFormatRGBA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x07 - // cudaEglColorFormatL = 8 - {"CU_EGL_COLOR_FORMAT_L", {"hipEglColorFormatL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x08 - // cudaEglColorFormatR = 9 - {"CU_EGL_COLOR_FORMAT_R", {"hipEglColorFormatR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x09 - // cudaEglColorFormatYUV444Planar = 10 - {"CU_EGL_COLOR_FORMAT_YUV444_PLANAR", {"hipEglColorFormatYUV444Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0A - // cudaEglColorFormatYUV444SemiPlanar = 11 - {"CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR", {"hipEglColorFormatYUV444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0B - // cudaEglColorFormatYUYV422 = 12 - {"CU_EGL_COLOR_FORMAT_YUYV_422", {"hipEglColorFormatYUYV422", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0C - // cudaEglColorFormatUYVY422 = 13 - {"CU_EGL_COLOR_FORMAT_UYVY_422", {"hipEglColorFormatUYVY422", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0D - // cudaEglColorFormatABGR = 14 - {"CU_EGL_COLOR_FORMAT_ABGR", {"hipEglColorFormatABGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0E - // cudaEglColorFormatBGRA = 15 - {"CU_EGL_COLOR_FORMAT_BGRA", {"hipEglColorFormatBGRA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0F - // cudaEglColorFormatA = 16 - {"CU_EGL_COLOR_FORMAT_A", {"hipEglColorFormatA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // cudaEglColorFormatRG = 17 - {"CU_EGL_COLOR_FORMAT_RG", {"hipEglColorFormatRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x11 - // cudaEglColorFormatAYUV = 18 - {"CU_EGL_COLOR_FORMAT_AYUV", {"hipEglColorFormatAYUV", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x12 - // cudaEglColorFormatYVU444SemiPlanar = 19 - {"CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR", {"hipEglColorFormatYVU444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x13 - // cudaEglColorFormatYVU422SemiPlanar = 20 - {"CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR", {"hipEglColorFormatYVU422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x14 - // cudaEglColorFormatYVU420SemiPlanar = 21 - {"CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR", {"hipEglColorFormatYVU420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x15 - // cudaEglColorFormatYVU420SemiPlanar = 22 - {"CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR", {"hipEglColorFormatY10V10U10_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x16 - // cudaEglColorFormatY10V10U10_420SemiPlanar = 23 - {"CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR", {"hipEglColorFormatY10V10U10_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x17 - // cudaEglColorFormatY12V12U12_444SemiPlanar = 24 - {"CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR", {"hipEglColorFormatY12V12U12_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x18 - // cudaEglColorFormatY12V12U12_420SemiPlanar = 25 - {"CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR", {"hipEglColorFormatY12V12U12_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x19 - // cudaEglColorFormatVYUY_ER = 26 - {"CU_EGL_COLOR_FORMAT_VYUY_ER", {"hipEglColorFormatVYUY_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1A - // cudaEglColorFormatUYVY_ER = 27 - {"CU_EGL_COLOR_FORMAT_UYVY_ER", {"hipEglColorFormatUYVY_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1B - // cudaEglColorFormatYUYV_ER = 28 - {"CU_EGL_COLOR_FORMAT_YUYV_ER", {"hipEglColorFormatYUYV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1C - // cudaEglColorFormatYVYU_ER = 29 - {"CU_EGL_COLOR_FORMAT_YVYU_ER", {"hipEglColorFormatYVYU_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1D - // cudaEglColorFormatYUV_ER = 30 - {"CU_EGL_COLOR_FORMAT_YUV_ER", {"hipEglColorFormatYUV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1E - // cudaEglColorFormatYUVA_ER = 31 - {"CU_EGL_COLOR_FORMAT_YUVA_ER", {"hipEglColorFormatYUVA_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1F - // cudaEglColorFormatAYUV_ER = 32 - {"CU_EGL_COLOR_FORMAT_AYUV_ER", {"hipEglColorFormatAYUV_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x20 - // cudaEglColorFormatYUV444Planar_ER = 33 - {"CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER", {"hipEglColorFormatYUV444Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x21 - // cudaEglColorFormatYUV422Planar_ER = 34 - {"CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER", {"hipEglColorFormatYUV422Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x22 - // cudaEglColorFormatYUV420Planar_ER = 35 - {"CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER", {"hipEglColorFormatYUV420Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x23 - // cudaEglColorFormatYUV444SemiPlanar_ER = 36 - {"CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER", {"hipEglColorFormatYUV444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x24 - // cudaEglColorFormatYUV422SemiPlanar_ER = 37 - {"CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER", {"hipEglColorFormatYUV422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x25 - // cudaEglColorFormatYUV420SemiPlanar_ER = 38 - {"CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER", {"hipEglColorFormatYUV420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x26 - // cudaEglColorFormatYVU444Planar_ER = 39 - {"CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER", {"hipEglColorFormatYVU444Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x27 - // cudaEglColorFormatYVU422Planar_ER = 40 - {"CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER", {"hipEglColorFormatYVU422Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x28 - // cudaEglColorFormatYVU420Planar_ER = 41 - {"CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER", {"hipEglColorFormatYVU420Planar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x29 - // cudaEglColorFormatYVU444SemiPlanar_ER = 42 - {"CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER", {"hipEglColorFormatYVU444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2A - // cudaEglColorFormatYVU422SemiPlanar_ER = 43 - {"CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER", {"hipEglColorFormatYVU422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2B - // cudaEglColorFormatYVU420SemiPlanar_ER = 44 - {"CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER", {"hipEglColorFormatYVU420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2C - // cudaEglColorFormatBayerRGGB = 45 - {"CU_EGL_COLOR_FORMAT_BAYER_RGGB", {"hipEglColorFormatBayerRGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2D - // cudaEglColorFormatBayerBGGR = 46 - {"CU_EGL_COLOR_FORMAT_BAYER_BGGR", {"hipEglColorFormatBayerBGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2E - // cudaEglColorFormatBayerGRBG = 47 - {"CU_EGL_COLOR_FORMAT_BAYER_GRBG", {"hipEglColorFormatBayerGRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2F - // cudaEglColorFormatBayerGBRG = 48 - {"CU_EGL_COLOR_FORMAT_BAYER_GBRG", {"hipEglColorFormatBayerGBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x30 - // cudaEglColorFormatBayer10RGGB = 49 - {"CU_EGL_COLOR_FORMAT_BAYER10_RGGB", {"hipEglColorFormatBayer10RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x31 - // cudaEglColorFormatBayer10BGGR = 50 - {"CU_EGL_COLOR_FORMAT_BAYER10_BGGR", {"hipEglColorFormatBayer10BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x32 - // cudaEglColorFormatBayer10GRBG = 51 - {"CU_EGL_COLOR_FORMAT_BAYER10_GRBG", {"hipEglColorFormatBayer10GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x33 - // cudaEglColorFormatBayer10GBRG = 52 - {"CU_EGL_COLOR_FORMAT_BAYER10_GBRG", {"hipEglColorFormatBayer10GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x34 - // cudaEglColorFormatBayer12RGGB = 53 - {"CU_EGL_COLOR_FORMAT_BAYER12_RGGB", {"hipEglColorFormatBayer12RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x35 - // cudaEglColorFormatBayer12BGGR = 54 - {"CU_EGL_COLOR_FORMAT_BAYER12_BGGR", {"hipEglColorFormatBayer12BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x36 - // cudaEglColorFormatBayer12GRBG = 55 - {"CU_EGL_COLOR_FORMAT_BAYER12_GRBG", {"hipEglColorFormatBayer12GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x37 - // cudaEglColorFormatBayer12GBRG = 56 - {"CU_EGL_COLOR_FORMAT_BAYER12_GBRG", {"hipEglColorFormatBayer12GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x38 - // cudaEglColorFormatBayer14RGGB = 57 - {"CU_EGL_COLOR_FORMAT_BAYER14_RGGB", {"hipEglColorFormatBayer14RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x39 - // cudaEglColorFormatBayer14BGGR = 58 - {"CU_EGL_COLOR_FORMAT_BAYER14_BGGR", {"hipEglColorFormatBayer14BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3A - // cudaEglColorFormatBayer14GRBG = 59 - {"CU_EGL_COLOR_FORMAT_BAYER14_GRBG", {"hipEglColorFormatBayer14GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3B - // cudaEglColorFormatBayer14GBRG = 60 - {"CU_EGL_COLOR_FORMAT_BAYER14_GBRG", {"hipEglColorFormatBayer14GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3C - // cudaEglColorFormatBayer20RGGB = 61 - {"CU_EGL_COLOR_FORMAT_BAYER20_RGGB", {"hipEglColorFormatBayer20RGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3D - // cudaEglColorFormatBayer20BGGR = 62 - {"CU_EGL_COLOR_FORMAT_BAYER20_BGGR", {"hipEglColorFormatBayer20BGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3E - // cudaEglColorFormatBayer20GRBG = 63 - {"CU_EGL_COLOR_FORMAT_BAYER20_GRBG", {"hipEglColorFormatBayer20GRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3F - // cudaEglColorFormatBayer20GBRG = 64 - {"CU_EGL_COLOR_FORMAT_BAYER20_GBRG", {"hipEglColorFormatBayer20GBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x40 - // cudaEglColorFormatYVU444Planar = 65 - {"CU_EGL_COLOR_FORMAT_YVU444_PLANAR", {"hipEglColorFormatYVU444Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x41 - // cudaEglColorFormatYVU422Planar = 66 - {"CU_EGL_COLOR_FORMAT_YVU422_PLANAR", {"hipEglColorFormatYVU422Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x42 - // cudaEglColorFormatYVU420Planar = 67 - {"CU_EGL_COLOR_FORMAT_YVU420_PLANAR", {"hipEglColorFormatYVU420Planar", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x43 - // cudaEglColorFormatBayerIspRGGB = 68 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB", {"hipEglColorFormatBayerIspRGGB", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x44 - // cudaEglColorFormatBayerIspBGGR = 69 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR", {"hipEglColorFormatBayerIspBGGR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x45 - // cudaEglColorFormatBayerIspGRBG = 70 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG", {"hipEglColorFormatBayerIspGRBG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x46 - // cudaEglColorFormatBayerIspGBRG = 71 - {"CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG", {"hipEglColorFormatBayerIspGBRG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x47 - // no analogue - {"CU_EGL_COLOR_FORMAT_MAX", {"hipEglColorFormatMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x48 - - // cudaEglFrameType - {"CUeglFrameType", {"hipEglFrameType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglFrameType_enum", {"hipEglFrameType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglFrameType enum values - // cudaEglFrameTypeArray - {"CU_EGL_FRAME_TYPE_ARRAY", {"hipEglFrameTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaEglFrameTypePitch - {"CU_EGL_FRAME_TYPE_PITCH", {"hipEglFrameTypePitch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - - // cudaEglResourceLocationFlags - {"CUeglResourceLocationFlags", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUeglResourceLocationFlags_enum", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUeglResourceLocationFlags enum values - // cudaEglResourceLocationSysmem - {"CU_EGL_RESOURCE_LOCATION_SYSMEM", {"hipEglResourceLocationSysmem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaEglResourceLocationVidmem - {"CU_EGL_RESOURCE_LOCATION_VIDMEM", {"hipEglResourceLocationVidmem", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // no analogue - {"CUevent_flags", {"hipEventFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUevent_flags_enum", {"hipEventFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUevent_flags enum values - // cudaEventDefault - {"CU_EVENT_DEFAULT", {"hipEventDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaEventBlockingSync - {"CU_EVENT_BLOCKING_SYNC", {"hipEventBlockingSync", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaEventDisableTiming - {"CU_EVENT_DISABLE_TIMING", {"hipEventDisableTiming", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaEventInterprocess - {"CU_EVENT_INTERPROCESS", {"hipEventInterprocess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - - // cudaExternalMemoryHandleType - {"CUexternalMemoryHandleType", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUexternalMemoryHandleType_enum", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUexternalMemoryHandleType enum values - // cudaExternalMemoryHandleTypeOpaqueFd - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD", {"hipExternalMemoryHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaExternalMemoryHandleTypeOpaqueWin32 - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32", {"hipExternalMemoryHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaExternalMemoryHandleTypeOpaqueWin32Kmt - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT", {"hipExternalMemoryHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaExternalMemoryHandleTypeD3D12Heap - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP", {"hipExternalMemoryHandleTypeD3D12Heap", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaExternalMemoryHandleTypeD3D12Resource - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE", {"hipExternalMemoryHandleTypeD3D12Resource", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaExternalMemoryHandleTypeD3D11Resource - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE", {"hipExternalMemoryHandleTypeD3D11Resource", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - // cudaExternalMemoryHandleTypeD3D11ResourceKmt - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT", {"hipExternalMemoryHandleTypeD3D11ResourceKmt", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - // cudaExternalMemoryHandleTypeNvSciBuf - {"CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF", {"hipExternalMemoryHandleTypeNvSciBuf", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - - // cudaExternalSemaphoreHandleType - {"CUexternalSemaphoreHandleType", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUexternalSemaphoreHandleType_enum", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUexternalSemaphoreHandleType enum values - // cudaExternalSemaphoreHandleTypeOpaqueFd - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD", {"hipExternalSemaphoreHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaExternalSemaphoreHandleTypeOpaqueWin32 - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32", {"hipExternalSemaphoreHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT", {"hipExternalSemaphoreHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaExternalSemaphoreHandleTypeD3D12Fence - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE", {"hipExternalSemaphoreHandleTypeD3D12Fence", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - - // cudaExternalSemaphoreHandleTypeD3D11Fence - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE", {"hipExternalSemaphoreHandleTypeD3D11Fence", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaExternalSemaphoreHandleTypeNvSciSync - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC", {"hipExternalSemaphoreHandleTypeNvSciSync", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - // cudaExternalSemaphoreHandleTypeKeyedMutex - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX", {"hipExternalSemaphoreHandleTypeKeyedMutex", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - // cudaExternalSemaphoreHandleTypeKeyedMutexKmt - {"CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT", {"hipExternalSemaphoreHandleTypeKeyedMutexKmt", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - - // cudaTextureFilterMode - {"CUfilter_mode", {"hipTextureFilterMode", "", CONV_TYPE, API_DRIVER}}, - {"CUfilter_mode_enum", {"hipTextureFilterMode", "", CONV_TYPE, API_DRIVER}}, - // CUfilter_mode enum values - // cudaFilterModePoint - {"CU_TR_FILTER_MODE_POINT", {"hipFilterModePoint", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaFilterModeLinear - {"CU_TR_FILTER_MODE_LINEAR", {"hipFilterModeLinear", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - - // cudaFuncCache - {"CUfunc_cache", {"hipFuncCache_t", "", CONV_TYPE, API_DRIVER}}, - {"CUfunc_cache_enum", {"hipFuncCache_t", "", CONV_TYPE, API_DRIVER}}, - // CUfunc_cache enum values - // cudaFilterModePoint = 0 - {"CU_FUNC_CACHE_PREFER_NONE", {"hipFuncCachePreferNone", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaFuncCachePreferShared = 1 - {"CU_FUNC_CACHE_PREFER_SHARED", {"hipFuncCachePreferShared", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaFuncCachePreferL1 = 2 - {"CU_FUNC_CACHE_PREFER_L1", {"hipFuncCachePreferL1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaFuncCachePreferEqual = 3 - {"CU_FUNC_CACHE_PREFER_EQUAL", {"hipFuncCachePreferEqual", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - - // cudaFuncAttribute - {"CUfunction_attribute", {"hipFunction_attribute", "", CONV_TYPE, API_DRIVER}}, - {"CUfunction_attribute_enum", {"hipFunction_attribute", "", CONV_TYPE, API_DRIVER}}, - // CUfunction_attribute enum values - // no analogue - {"CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", {"HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // no analogue - {"CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // no analogue - {"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // no analogue - {"CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // no analogue - {"CU_FUNC_ATTRIBUTE_NUM_REGS", {"HIP_FUNC_ATTRIBUTE_NUM_REGS", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // no analogue - {"CU_FUNC_ATTRIBUTE_PTX_VERSION", {"HIP_FUNC_ATTRIBUTE_PTX_VERSION", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // no analogue - {"CU_FUNC_ATTRIBUTE_BINARY_VERSION", {"HIP_FUNC_ATTRIBUTE_BINARY_VERSION", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // no analogue - {"CU_FUNC_ATTRIBUTE_CACHE_MODE_CA", {"HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaFuncAttributeMaxDynamicSharedMemorySize - {"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", {"HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaFuncAttributePreferredSharedMemoryCarveout - {"CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", {"HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 9 - // cudaFuncAttributeMax - {"CU_FUNC_ATTRIBUTE_MAX", {"HIP_FUNC_ATTRIBUTE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 10 - - //cudaGraphicsMapFlags - {"CUgraphicsMapResourceFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphicsMapResourceFlags_enum", {"hipGraphicsMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUgraphicsMapResourceFlags enum values - // cudaGraphicsMapFlagsNone = 0 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE", {"hipGraphicsMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsMapFlagsReadOnly = 1 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY", {"hipGraphicsMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGraphicsMapFlagsWriteDiscard = 2 - {"CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD", {"hipGraphicsMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaGraphicsRegisterFlags - {"CUgraphicsRegisterFlags", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphicsRegisterFlags_enum", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphicsRegisterFlags enum values - //cudaGraphicsRegisterFlagsNone = 0 - {"CU_GRAPHICS_REGISTER_FLAGS_NONE", {"hipGraphicsRegisterFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGraphicsRegisterFlagsReadOnly = 1 - {"CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY", {"hipGraphicsRegisterFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - //cudaGraphicsRegisterFlagsWriteDiscard = 2 - {"CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD", {"hipGraphicsRegisterFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGraphicsRegisterFlagsSurfaceLoadStore = 4 - {"CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST", {"hipGraphicsRegisterFlagsSurfaceLoadStore", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaGraphicsRegisterFlagsTextureGather = 8 - {"CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER", {"hipGraphicsRegisterFlagsTextureGather", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x08 - - // cudaGraphNodeType - {"CUgraphNodeType", {"hipGraphNodeType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphNodeType_enum", {"hipGraphNodeType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaGraphNodeType enum values - // cudaGraphNodeTypeKernel = 0x00 - {"CU_GRAPH_NODE_TYPE_KERNEL", {"hipGraphNodeTypeKernel", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaGraphNodeTypeMemcpy = 0x01 - {"CU_GRAPH_NODE_TYPE_MEMCPY", {"hipGraphNodeTypeMemcpy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaGraphNodeTypeMemset = 0x02 - {"CU_GRAPH_NODE_TYPE_MEMSET", {"hipGraphNodeTypeMemset", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaGraphNodeTypeHost = 0x03 - {"CU_GRAPH_NODE_TYPE_HOST", {"hipGraphNodeTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaGraphNodeTypeGraph = 0x04 - {"CU_GRAPH_NODE_TYPE_GRAPH", {"hipGraphNodeTypeGraph", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaGraphNodeTypeEmpty = 0x05 - {"CU_GRAPH_NODE_TYPE_EMPTY", {"hipGraphNodeTypeEmpty", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaGraphNodeTypeCount - {"CU_GRAPH_NODE_TYPE_COUNT", {"hipGraphNodeTypeCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - - // cudaGraphExecUpdateResult - {"CUgraphExecUpdateResult", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUgraphExecUpdateResult_enum", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUgraphExecUpdateResult enum values - // cudaGraphExecUpdateSuccess - {"CU_GRAPH_EXEC_UPDATE_SUCCESS", {"hipGraphExecUpdateSuccess", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - // cudaGraphExecUpdateError - {"CU_GRAPH_EXEC_UPDATE_ERROR", {"hipGraphExecUpdateError", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaGraphExecUpdateErrorTopologyChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED", {"hipGraphExecUpdateErrorTopologyChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - // cudaGraphExecUpdateErrorNodeTypeChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED", {"hipGraphExecUpdateErrorNodeTypeChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3 - // cudaGraphExecUpdateErrorFunctionChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED", {"hipGraphExecUpdateErrorFunctionChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - // cudaGraphExecUpdateErrorParametersChanged - {"CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED", {"hipGraphExecUpdateErrorParametersChanged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x5 - // cudaGraphExecUpdateErrorNotSupported - {"CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED", {"hipGraphExecUpdateErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x6 - - // no analogue - {"CUipcMem_flags", {"hipIpcMemFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUipcMem_flags_enum", {"hipIpcMemFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUipcMem_flags enum values - // cudaIpcMemLazyEnablePeerAccess - {"CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS", {"hipIpcMemLazyEnablePeerAccess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - - // no analogue - {"CUjit_cacheMode", {"hipJitCacheMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_cacheMode_enum", {"hipJitCacheMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_cacheMode enum values - // no analogue - {"CU_JIT_CACHE_OPTION_NONE", {"hipJitCacheModeOptionNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // no analogue - {"CU_JIT_CACHE_OPTION_CG", {"hipJitCacheModeOptionCG", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - // no analogue - {"CU_JIT_CACHE_OPTION_CA", {"hipJitCacheModeOptionCA", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUjit_fallback", {"hipJitFallback", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_fallback_enum", {"hipJitFallback", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_fallback enum values - {"CU_PREFER_PTX", {"hipJitFallbackPreferPtx", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - {"CU_PREFER_BINARY", {"hipJitFallbackPreferBinary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - // NOTE: is not used by HIP, as it has no JIT, thus just a dummy enum - {"CUjit_option", {"hipJitOption", "", CONV_TYPE, API_DRIVER}}, - {"CUjit_option_enum", {"hipJitOption", "", CONV_TYPE, API_DRIVER}}, - // CUjit_option enum values - {"CU_JIT_MAX_REGISTERS", {"hipJitOptionMaxRegisters", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - {"CU_JIT_THREADS_PER_BLOCK", {"hipJitOptionThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_WALL_TIME", {"hipJitOptionWallTime", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_INFO_LOG_BUFFER", {"hipJitOptionInfoLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionInfoLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_ERROR_LOG_BUFFER", {"hipJitOptionErrorLogBuffer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES", {"hipJitOptionErrorLogBufferSizeBytes", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_OPTIMIZATION_LEVEL", {"hipJitOptionOptimizationLevel", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_TARGET_FROM_CUCONTEXT", {"hipJitOptionTargetFromContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_TARGET", {"hipJitOptionTarget", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_FALLBACK_STRATEGY", {"hipJitOptionFallbackStrategy", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GENERATE_DEBUG_INFO", {"hipJitOptionGenerateDebugInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_LOG_VERBOSE", {"hipJitOptionLogVerbose", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GENERATE_LINE_INFO", {"hipJitOptionGenerateLineInfo", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_CACHE_MODE", {"hipJitOptionCacheMode", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_NEW_SM3X_OPT", {"hipJitOptionSm3xOpt", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_FAST_COMPILE", {"hipJitOptionFastCompile", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_NAMES", {"hipJitGlobalSymbolNames", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_ADDRESSES", {"hipJitGlobalSymbolAddresses", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_GLOBAL_SYMBOL_COUNT", {"hipJitGlobalSymbolCount", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - {"CU_JIT_NUM_OPTIONS", {"hipJitOptionNumOptions", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, - - // no analogue - {"CUjit_target", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjit_target_enum", {"hipJitTarget", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjit_target enum values - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_10", {"hipJitTargetCompute10", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 10 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_11", {"hipJitTargetCompute11", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 11 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_12", {"hipJitTargetCompute12", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 12 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_13", {"hipJitTargetCompute13", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 13 - {"CU_TARGET_COMPUTE_20", {"hipJitTargetCompute20", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 20 - {"CU_TARGET_COMPUTE_21", {"hipJitTargetCompute21", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 21 - {"CU_TARGET_COMPUTE_30", {"hipJitTargetCompute30", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 20 - {"CU_TARGET_COMPUTE_32", {"hipJitTargetCompute32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 32 - {"CU_TARGET_COMPUTE_35", {"hipJitTargetCompute35", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 35 - {"CU_TARGET_COMPUTE_37", {"hipJitTargetCompute37", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 37 - {"CU_TARGET_COMPUTE_50", {"hipJitTargetCompute50", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 50 - {"CU_TARGET_COMPUTE_52", {"hipJitTargetCompute52", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 52 - {"CU_TARGET_COMPUTE_53", {"hipJitTargetCompute53", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 53 - {"CU_TARGET_COMPUTE_60", {"hipJitTargetCompute60", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 60 - {"CU_TARGET_COMPUTE_61", {"hipJitTargetCompute61", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 61 - {"CU_TARGET_COMPUTE_62", {"hipJitTargetCompute62", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 62 - {"CU_TARGET_COMPUTE_70", {"hipJitTargetCompute70", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - {"CU_TARGET_COMPUTE_72", {"hipJitTargetCompute72", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 70 - // NOTE: Deprecated - {"CU_TARGET_COMPUTE_73", {"hipJitTargetCompute73", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 73 - {"CU_TARGET_COMPUTE_75", {"hipJitTargetCompute75", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 75 - - // no analogue - {"CUjitInputType", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUjitInputType_enum", {"hipJitInputType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUjitInputType enum values - {"CU_JIT_INPUT_CUBIN", {"hipJitInputTypeBin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - {"CU_JIT_INPUT_PTX", {"hipJitInputTypePtx", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_FATBINARY", {"hipJitInputTypeFatBinary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_OBJECT", {"hipJitInputTypeObject", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_INPUT_LIBRARY", {"hipJitInputTypeLibrary", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - {"CU_JIT_NUM_INPUT_TYPES", {"hipJitInputTypeNumInputTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaLimit - {"CUlimit", {"hipLimit_t", "", CONV_TYPE, API_DRIVER}}, - {"CUlimit_enum", {"hipLimit_t", "", CONV_TYPE, API_DRIVER}}, - // CUlimit enum values - // cudaLimitStackSize - {"CU_LIMIT_STACK_SIZE", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - //cudaLimitPrintfFifoSize - {"CU_LIMIT_PRINTF_FIFO_SIZE", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - //cudaLimitMallocHeapSize - {"CU_LIMIT_MALLOC_HEAP_SIZE", {"hipLimitMallocHeapSize", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaLimitDevRuntimeSyncDepth - {"CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH", {"hipLimitDevRuntimeSyncDepth", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - // cudaLimitDevRuntimePendingLaunchCount - {"CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT", {"hipLimitDevRuntimePendingLaunchCount", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x04 - // cudaLimitMaxL2FetchGranularity - {"CU_LIMIT_MAX_L2_FETCH_GRANULARITY", {"hipLimitMaxL2FetchGranularity", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 - // no analogue - {"CU_LIMIT_MAX", {"hipLimitMax", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaMemoryAdvise - {"CUmem_advise", {"hipMemAdvise", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmem_advise_enum", {"hipMemAdvise", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmem_advise enum values - // cudaMemAdviseSetReadMostly - {"CU_MEM_ADVISE_SET_READ_MOSTLY", {"hipMemAdviseSetReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaMemAdviseUnsetReadMostly - {"CU_MEM_ADVISE_UNSET_READ_MOSTLY", {"hipMemAdviseUnsetReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaMemAdviseSetPreferredLocation - {"CU_MEM_ADVISE_SET_PREFERRED_LOCATION", {"hipMemAdviseSetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaMemAdviseUnsetPreferredLocation - {"CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION", {"hipMemAdviseUnsetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - // cudaMemAdviseSetAccessedBy - {"CU_MEM_ADVISE_SET_ACCESSED_BY", {"hipMemAdviseSetAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - // cudaMemAdviseUnsetAccessedBy - {"CU_MEM_ADVISE_UNSET_ACCESSED_BY", {"hipMemAdviseUnsetAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - - // no analogue - {"CUmemAttach_flags", {"hipMemAttachFlags_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAttach_flags_enum", {"hipMemAttachFlags_t", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAttach_flags enum values - // cudaMemAttachGlobal - {"CU_MEM_ATTACH_GLOBAL", {"hipMemAttachGlobal", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - // cudaMemAttachHost - {"CU_MEM_ATTACH_HOST", {"hipMemAttachHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x2 - // cudaMemAttachSingle - {"CU_MEM_ATTACH_SINGLE", {"hipMemAttachSingle", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - - // no analogue - // NOTE: cudaMemoryType is partial analogue - {"CUmemorytype", {"hipMemoryType", "", CONV_TYPE, API_DRIVER}}, - {"CUmemorytype_enum", {"hipMemoryType", "", CONV_TYPE, API_DRIVER}}, - // CUmemorytype enum values - {"CU_MEMORYTYPE_HOST", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - {"CU_MEMORYTYPE_DEVICE", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - {"CU_MEMORYTYPE_ARRAY", {"hipMemoryTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - {"CU_MEMORYTYPE_UNIFIED", {"hipMemoryTypeUnified", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - - // cudaMemRangeAttribute - {"CUmem_range_attribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmem_range_attribute_enum", {"hipMemRangeAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmem_range_attribute enum values - // cudaMemRangeAttributeReadMostly - {"CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY", {"hipMemRangeAttributeReadMostly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaMemRangeAttributePreferredLocation - {"CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION", {"hipMemRangeAttributePreferredLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - // cudaMemRangeAttributeAccessedBy - {"CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - // cudaMemRangeAttributeLastPrefetchLocation - {"CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - - //no analogue - {"CUoccupancy_flags", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUoccupancy_flags_enum", {"hipOccupancyFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUoccupancy_flags enum values - // cudaOccupancyDefault - {"CU_OCCUPANCY_DEFAULT", {"hipOccupancyDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaOccupancyDisableCachingOverride - {"CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE", {"hipOccupancyDisableCachingOverride", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - //no analogue - // TODO: Analogous enum is needed in HIP. Couldn't map enum to struct hipPointerAttribute_t. - // TODO: Do the same for Pointer Attributes as for Device Attributes. - {"CUpointer_attribute", {"hipPointerAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUpointer_attribute_enum", {"hipPointerAttribute", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUpointer_attribute enum values - {"CU_POINTER_ATTRIBUTE_CONTEXT", {"hipPointerAttributeContext", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - {"CU_POINTER_ATTRIBUTE_MEMORY_TYPE", {"hipPointerAttributeMemoryType", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - {"CU_POINTER_ATTRIBUTE_DEVICE_POINTER", {"hipPointerAttributeDevicePointer", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - {"CU_POINTER_ATTRIBUTE_HOST_POINTER", {"hipPointerAttributeHostPointer", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - {"CU_POINTER_ATTRIBUTE_P2P_TOKENS", {"hipPointerAttributeP2pTokens", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - {"CU_POINTER_ATTRIBUTE_SYNC_MEMOPS", {"hipPointerAttributeSyncMemops", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 6 - {"CU_POINTER_ATTRIBUTE_BUFFER_ID", {"hipPointerAttributeBufferId", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 7 - {"CU_POINTER_ATTRIBUTE_IS_MANAGED", {"hipPointerAttributeIsManaged", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 8 - {"CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL", {"hipPointerAttributeDeviceOrdinal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 9 - {"CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE", {"hipPointerAttributeIsLegacyCudaIpcCapable", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 10 - {"CU_POINTER_ATTRIBUTE_RANGE_START_ADDR", {"hipPointerAttributeRangeStartAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 11 - {"CU_POINTER_ATTRIBUTE_RANGE_SIZE", {"hipPointerAttributeRangeSize", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 12 - {"CU_POINTER_ATTRIBUTE_MAPPED", {"hipPointerAttributeMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 13 - {"CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES", {"hipPointerAttributeAllowedHandleTypes", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 14 - - // cudaResourceType - {"CUresourcetype", {"hipResourceType", "", CONV_TYPE, API_DRIVER}}, - {"CUresourcetype_enum", {"hipResourceType", "", CONV_TYPE, API_DRIVER}}, - // CUresourcetype enum values - // cudaResourceTypeArray - {"CU_RESOURCE_TYPE_ARRAY", {"hipResourceTypeArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - //cudaResourceTypeMipmappedArray - {"CU_RESOURCE_TYPE_MIPMAPPED_ARRAY", {"hipResourceTypeMipmappedArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - //cudaResourceTypeLinear - {"CU_RESOURCE_TYPE_LINEAR", {"hipResourceTypeLinear", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - //cudaResourceTypePitch2D - {"CU_RESOURCE_TYPE_PITCH2D", {"hipResourceTypePitch2D", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - - // cudaResourceViewFormat - {"CUresourceViewFormat", {"hipResourceViewFormat", "", CONV_TYPE, API_DRIVER}}, - {"CUresourceViewFormat_enum", {"hipResourceViewFormat", "", CONV_TYPE, API_DRIVER}}, - // CUresourceViewFormat enum values - // cudaResViewFormatNone - {"CU_RES_VIEW_FORMAT_NONE", {"hipResViewFormatNone", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaResViewFormatUnsignedChar1 - {"CU_RES_VIEW_FORMAT_UINT_1X8", {"hipResViewFormatUnsignedChar1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaResViewFormatUnsignedChar2 - {"CU_RES_VIEW_FORMAT_UINT_2X8", {"hipResViewFormatUnsignedChar2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - // cudaResViewFormatUnsignedChar4 - {"CU_RES_VIEW_FORMAT_UINT_4X8", {"hipResViewFormatUnsignedChar4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x03 - // cudaResViewFormatSignedChar1 - {"CU_RES_VIEW_FORMAT_SINT_1X8", {"hipResViewFormatSignedChar1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x04 - // cudaResViewFormatSignedChar2 - {"CU_RES_VIEW_FORMAT_SINT_2X8", {"hipResViewFormatSignedChar2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x05 - // cudaResViewFormatSignedChar4 - {"CU_RES_VIEW_FORMAT_SINT_4X8", {"hipResViewFormatSignedChar4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x06 - // cudaResViewFormatUnsignedShort1 - {"CU_RES_VIEW_FORMAT_UINT_1X16", {"hipResViewFormatUnsignedShort1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x07 - // cudaResViewFormatUnsignedShort2 - {"CU_RES_VIEW_FORMAT_UINT_2X16", {"hipResViewFormatUnsignedShort2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x08 - // cudaResViewFormatUnsignedShort4 - {"CU_RES_VIEW_FORMAT_UINT_4X16", {"hipResViewFormatUnsignedShort4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x09 - // cudaResViewFormatSignedShort1 - {"CU_RES_VIEW_FORMAT_SINT_1X16", {"hipResViewFormatSignedShort1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0a - // cudaResViewFormatSignedShort2 - {"CU_RES_VIEW_FORMAT_SINT_2X16", {"hipResViewFormatSignedShort2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0b - // cudaResViewFormatSignedShort4 - {"CU_RES_VIEW_FORMAT_SINT_4X16", {"hipResViewFormatSignedShort4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0c - // cudaResViewFormatUnsignedInt1 - {"CU_RES_VIEW_FORMAT_UINT_1X32", {"hipResViewFormatUnsignedInt1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0d - // cudaResViewFormatUnsignedInt2 - {"CU_RES_VIEW_FORMAT_UINT_2X32", {"hipResViewFormatUnsignedInt2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0e - // cudaResViewFormatUnsignedInt4 - {"CU_RES_VIEW_FORMAT_UINT_4X32", {"hipResViewFormatUnsignedInt4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0f - // cudaResViewFormatSignedInt1 - {"CU_RES_VIEW_FORMAT_SINT_1X32", {"hipResViewFormatSignedInt1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x10 - // cudaResViewFormatSignedInt2 - {"CU_RES_VIEW_FORMAT_SINT_2X32", {"hipResViewFormatSignedInt2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x11 - // cudaResViewFormatSignedInt4 - {"CU_RES_VIEW_FORMAT_SINT_4X32", {"hipResViewFormatSignedInt4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x12 - // cudaResViewFormatHalf1 - {"CU_RES_VIEW_FORMAT_FLOAT_1X16", {"hipResViewFormatHalf1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x13 - // cudaResViewFormatHalf2 - {"CU_RES_VIEW_FORMAT_FLOAT_2X16", {"hipResViewFormatHalf2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x14 - // cudaResViewFormatHalf4 - {"CU_RES_VIEW_FORMAT_FLOAT_4X16", {"hipResViewFormatHalf4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x15 - // cudaResViewFormatFloat1 - {"CU_RES_VIEW_FORMAT_FLOAT_1X32", {"hipResViewFormatFloat1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x16 - // cudaResViewFormatFloat2 - {"CU_RES_VIEW_FORMAT_FLOAT_2X32", {"hipResViewFormatFloat2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x17 - // cudaResViewFormatFloat4 - {"CU_RES_VIEW_FORMAT_FLOAT_4X32", {"hipResViewFormatFloat4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x18 - // cudaResViewFormatUnsignedBlockCompressed1 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC1", {"hipResViewFormatUnsignedBlockCompressed1", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x19 - // cudaResViewFormatUnsignedBlockCompressed2 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC2", {"hipResViewFormatUnsignedBlockCompressed2", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1a - // cudaResViewFormatUnsignedBlockCompressed3 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC3", {"hipResViewFormatUnsignedBlockCompressed3", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1b - // cudaResViewFormatUnsignedBlockCompressed4 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC4", {"hipResViewFormatUnsignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1c - // cudaResViewFormatSignedBlockCompressed4 - {"CU_RES_VIEW_FORMAT_SIGNED_BC4", {"hipResViewFormatSignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1d - // cudaResViewFormatUnsignedBlockCompressed5 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC5", {"hipResViewFormatUnsignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1e - // cudaResViewFormatSignedBlockCompressed5 - {"CU_RES_VIEW_FORMAT_SIGNED_BC5", {"hipResViewFormatSignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1f - // cudaResViewFormatUnsignedBlockCompressed6H - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC6H", {"hipResViewFormatUnsignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x20 - // cudaResViewFormatSignedBlockCompressed6H - {"CU_RES_VIEW_FORMAT_SIGNED_BC6H", {"hipResViewFormatSignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x21 - // cudaResViewFormatUnsignedBlockCompressed7 - {"CU_RES_VIEW_FORMAT_UNSIGNED_BC7", {"hipResViewFormatUnsignedBlockCompressed7", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x22 - - // cudaError - {"CUresult", {"hipError_t", "", CONV_TYPE, API_DRIVER}}, - {"cudaError_enum", {"hipError_t", "", CONV_TYPE, API_DRIVER}}, - // CUresult enum values - // cudaSuccess - {"CUDA_SUCCESS", {"hipSuccess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0 - // cudaErrorInvalidValue - {"CUDA_ERROR_INVALID_VALUE", {"hipErrorInvalidValue", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 1 - // cudaErrorMemoryAllocation - {"CUDA_ERROR_OUT_OF_MEMORY", {"hipErrorOutOfMemory", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 2 - // cudaErrorInitializationError - {"CUDA_ERROR_NOT_INITIALIZED", {"hipErrorNotInitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 3 - // cudaErrorCudartUnloading - {"CUDA_ERROR_DEINITIALIZED", {"hipErrorDeinitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 4 - // cudaErrorProfilerDisabled - {"CUDA_ERROR_PROFILER_DISABLED", {"hipErrorProfilerDisabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 5 - // cudaErrorProfilerNotInitialized - // NOTE: Deprecated since CUDA 5.0 - {"CUDA_ERROR_PROFILER_NOT_INITIALIZED", {"hipErrorProfilerNotInitialized", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 6 - // NOTE: Deprecated since CUDA 5.0 - // cudaErrorProfilerAlreadyStarted - {"CUDA_ERROR_PROFILER_ALREADY_STARTED", {"hipErrorProfilerAlreadyStarted", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 7 - // cudaErrorProfilerAlreadyStopped - // NOTE: Deprecated since CUDA 5.0 - {"CUDA_ERROR_PROFILER_ALREADY_STOPPED", {"hipErrorProfilerAlreadyStopped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 8 - // cudaErrorNoDevice - {"CUDA_ERROR_NO_DEVICE", {"hipErrorNoDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 100 - // cudaErrorInvalidDevice - {"CUDA_ERROR_INVALID_DEVICE", {"hipErrorInvalidDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 101 - // cudaErrorInvalidKernelImage - {"CUDA_ERROR_INVALID_IMAGE", {"hipErrorInvalidImage", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 200 - // cudaErrorDeviceUninitilialized - {"CUDA_ERROR_INVALID_CONTEXT", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 201 - // no analogue - // NOTE: Deprecated since CUDA 3.2 - {"CUDA_ERROR_CONTEXT_ALREADY_CURRENT", {"hipErrorContextAlreadyCurrent", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 202 - // cudaErrorMapBufferObjectFailed - {"CUDA_ERROR_MAP_FAILED", {"hipErrorMapFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 205 - // cudaErrorUnmapBufferObjectFailed - {"CUDA_ERROR_UNMAP_FAILED", {"hipErrorUnmapFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 206 - // cudaErrorArrayIsMapped - {"CUDA_ERROR_ARRAY_IS_MAPPED", {"hipErrorArrayIsMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 207 - // cudaErrorAlreadyMapped - {"CUDA_ERROR_ALREADY_MAPPED", {"hipErrorAlreadyMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 208 - // cudaErrorNoKernelImageForDevice - {"CUDA_ERROR_NO_BINARY_FOR_GPU", {"hipErrorNoBinaryForGpu", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 209 - // cudaErrorAlreadyAcquired - {"CUDA_ERROR_ALREADY_ACQUIRED", {"hipErrorAlreadyAcquired", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 210 - // cudaErrorNotMapped - {"CUDA_ERROR_NOT_MAPPED", {"hipErrorNotMapped", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 211 - // cudaErrorNotMappedAsArray - {"CUDA_ERROR_NOT_MAPPED_AS_ARRAY", {"hipErrorNotMappedAsArray", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 212 - // cudaErrorNotMappedAsPointer - {"CUDA_ERROR_NOT_MAPPED_AS_POINTER", {"hipErrorNotMappedAsPointer", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 213 - // cudaErrorECCUncorrectable - {"CUDA_ERROR_ECC_UNCORRECTABLE", {"hipErrorECCNotCorrectable", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 214 - // cudaErrorUnsupportedLimit - {"CUDA_ERROR_UNSUPPORTED_LIMIT", {"hipErrorUnsupportedLimit", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 215 - // cudaErrorDeviceAlreadyInUse - {"CUDA_ERROR_CONTEXT_ALREADY_IN_USE", {"hipErrorContextAlreadyInUse", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 216 - // cudaErrorPeerAccessUnsupported - {"CUDA_ERROR_PEER_ACCESS_UNSUPPORTED", {"hipErrorPeerAccessUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 217 - // cudaErrorInvalidPtx - {"CUDA_ERROR_INVALID_PTX", {"hipErrorInvalidKernelFile", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 218 - // cudaErrorInvalidGraphicsContext - {"CUDA_ERROR_INVALID_GRAPHICS_CONTEXT", {"hipErrorInvalidGraphicsContext", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 219 - // cudaErrorNvlinkUncorrectable - {"CUDA_ERROR_NVLINK_UNCORRECTABLE", {"hipErrorNvlinkUncorrectable", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 220 - // cudaErrorJitCompilerNotFound - {"CUDA_ERROR_JIT_COMPILER_NOT_FOUND", {"hipErrorJitCompilerNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 221 - // cudaErrorInvalidSource - {"CUDA_ERROR_INVALID_SOURCE", {"hipErrorInvalidSource", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 300 - // cudaErrorFileNotFound - {"CUDA_ERROR_FILE_NOT_FOUND", {"hipErrorFileNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 301 - // cudaErrorSharedObjectSymbolNotFound - {"CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND", {"hipErrorSharedObjectSymbolNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 302 - // cudaErrorSharedObjectInitFailed - {"CUDA_ERROR_SHARED_OBJECT_INIT_FAILED", {"hipErrorSharedObjectInitFailed", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 303 - // cudaErrorOperatingSystem - {"CUDA_ERROR_OPERATING_SYSTEM", {"hipErrorOperatingSystem", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 304 - // cudaErrorInvalidResourceHandle - {"CUDA_ERROR_INVALID_HANDLE", {"hipErrorInvalidHandle", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 400 - // cudaErrorIllegalState - {"CUDA_ERROR_ILLEGAL_STATE", {"hipErrorIllegalState", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 401 - // cudaErrorSymbolNotFound - {"CUDA_ERROR_NOT_FOUND", {"hipErrorNotFound", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 500 - // cudaErrorNotReady - {"CUDA_ERROR_NOT_READY", {"hipErrorNotReady", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 600 - // cudaErrorIllegalAddress - {"CUDA_ERROR_ILLEGAL_ADDRESS", {"hipErrorIllegalAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 700 - // cudaErrorLaunchOutOfResources - {"CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES", {"hipErrorLaunchOutOfResources", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 701 - // cudaErrorLaunchTimeout - {"CUDA_ERROR_LAUNCH_TIMEOUT", {"hipErrorLaunchTimeOut", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 702 - // cudaErrorLaunchIncompatibleTexturing - {"CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING", {"hipErrorLaunchIncompatibleTexturing", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 703 - // cudaErrorPeerAccessAlreadyEnabled - {"CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED", {"hipErrorPeerAccessAlreadyEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 704 - // cudaErrorPeerAccessNotEnabled - {"CUDA_ERROR_PEER_ACCESS_NOT_ENABLED", {"hipErrorPeerAccessNotEnabled", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 705 - // cudaErrorSetOnActiveProcess - {"CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE", {"hipErrorSetOnActiveProcess", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 708 - // cudaErrorContextIsDestroyed - {"CUDA_ERROR_CONTEXT_IS_DESTROYED", {"hipErrorContextIsDestroyed", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 709 - // cudaErrorAssert - {"CUDA_ERROR_ASSERT", {"hipErrorAssert", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 710 - // cudaErrorTooManyPeers - {"CUDA_ERROR_TOO_MANY_PEERS", {"hipErrorTooManyPeers", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 711 - // cudaErrorHostMemoryAlreadyRegistered - {"CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED", {"hipErrorHostMemoryAlreadyRegistered", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 712 - // cudaErrorHostMemoryNotRegistered - {"CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED", {"hipErrorHostMemoryNotRegistered", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 713 - // cudaErrorHardwareStackError - {"CUDA_ERROR_HARDWARE_STACK_ERROR", {"hipErrorHardwareStackError", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 714 - // cudaErrorIllegalInstruction - {"CUDA_ERROR_ILLEGAL_INSTRUCTION", {"hipErrorIllegalInstruction", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 715 - // cudaErrorMisalignedAddress - {"CUDA_ERROR_MISALIGNED_ADDRESS", {"hipErrorMisalignedAddress", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 716 - // cudaErrorInvalidAddressSpace - {"CUDA_ERROR_INVALID_ADDRESS_SPACE", {"hipErrorInvalidAddressSpace", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 717 - // cudaErrorInvalidPc - {"CUDA_ERROR_INVALID_PC", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 718 - // cudaErrorLaunchFailure - {"CUDA_ERROR_LAUNCH_FAILED", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 719 - // cudaErrorCooperativeLaunchTooLarge - {"CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 720 - // cudaErrorNotPermitted - {"CUDA_ERROR_NOT_PERMITTED", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 800 - // cudaErrorNotSupported - {"CUDA_ERROR_NOT_SUPPORTED", {"hipErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 801 - // cudaErrorSystemNotReady - {"CUDA_ERROR_SYSTEM_NOT_READY", {"hipErrorSystemNotReady", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 802 - // cudaErrorSystemDriverMismatch - {"CUDA_ERROR_SYSTEM_DRIVER_MISMATCH", {"hipErrorSystemDriverMismatch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 803 - // cudaErrorCompatNotSupportedOnDevice - {"CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE", {"hipErrorCompatNotSupportedOnDevice", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 804 - // cudaErrorStreamCaptureUnsupported - {"CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 900 - // cudaErrorStreamCaptureInvalidated - {"CUDA_ERROR_STREAM_CAPTURE_INVALIDATED", {"hipErrorStreamCaptureInvalidated", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 901 - // cudaErrorStreamCaptureMerge - {"CUDA_ERROR_STREAM_CAPTURE_MERGE", {"hipErrorStreamCaptureMerge", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 902 - // cudaErrorStreamCaptureUnmatched - {"CUDA_ERROR_STREAM_CAPTURE_UNMATCHED", {"hipErrorStreamCaptureUnmatched", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 903 - // cudaErrorStreamCaptureUnjoined - {"CUDA_ERROR_STREAM_CAPTURE_UNJOINED", {"hipErrorStreamCaptureUnjoined", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 904 - // cudaErrorStreamCaptureIsolation - {"CUDA_ERROR_STREAM_CAPTURE_ISOLATION", {"hipErrorStreamCaptureIsolation", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 905 - // cudaErrorStreamCaptureImplicit - {"CUDA_ERROR_STREAM_CAPTURE_IMPLICIT", {"hipErrorStreamCaptureImplicit", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 906 - // cudaErrorCapturedEvent - {"CUDA_ERROR_CAPTURED_EVENT", {"hipErrorCapturedEvent", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 907 - // cudaErrorStreamCaptureWrongThread - {"CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD", {"hipErrorStreamCaptureWrongThread", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 908 - // cudaErrorTimeout - {"CUDA_ERROR_TIMEOUT", {"hipErrorTimeout", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 909 - // cudaErrorGraphExecUpdateFailure - {"CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 910 - // cudaErrorUnknown - {"CUDA_ERROR_UNKNOWN", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 999 - - // cudaSharedMemConfig - {"CUsharedconfig", {"hipSharedMemConfig", "", CONV_TYPE, API_DRIVER}}, - {"CUsharedconfig_enum", {"hipSharedMemConfig", "", CONV_TYPE, API_DRIVER}}, - // CUsharedconfig enum values - // cudaSharedMemBankSizeDefault = 0 - {"CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE", {"hipSharedMemBankSizeDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x00 - // cudaSharedMemBankSizeFourByte = 1 - {"CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE", {"hipSharedMemBankSizeFourByte", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x01 - // cudaSharedMemBankSizeEightByte = 2 - {"CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE", {"hipSharedMemBankSizeEightByte", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x02 - - // cudaSharedCarveout - {"CUshared_carveout", {"hipSharedCarveout", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUshared_carveout_enum", {"hipSharedCarveout", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUshared_carveout enum values - // cudaSharedmemCarveoutDefault - {"CU_SHAREDMEM_CARVEOUT_DEFAULT", {"hipSharedmemCarveoutDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // -1 - // cudaSharedmemCarveoutMaxShared - {"CU_SHAREDMEM_CARVEOUT_MAX_SHARED", {"hipSharedmemCarveoutMaxShared", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 100 - // cudaSharedmemCarveoutMaxShared - {"CU_SHAREDMEM_CARVEOUT_MAX_L1", {"hipSharedmemCarveoutMaxL1", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - - // no analogue - {"CUstream_flags", {"hipStreamFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstream_flags_enum", {"hipStreamFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstream_flags enum values - // cudaStreamDefault = 0x00 - {"CU_STREAM_DEFAULT", {"hipStreamDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x0 - // cudaStreamNonBlocking = 0x01 - {"CU_STREAM_NON_BLOCKING", {"hipStreamNonBlocking", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 0x1 - - // no analogue - {"CUstreamBatchMemOpType", {"hipStreamBatchMemOpType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamBatchMemOpType_enum", {"hipStreamBatchMemOpType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamBatchMemOpType enum values - {"CU_STREAM_MEM_OP_WAIT_VALUE_32", {"hipStreamBatchMemOpWaitValue32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - {"CU_STREAM_MEM_OP_WRITE_VALUE_32", {"hipStreamBatchMemOpWriteValue32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - {"CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES", {"hipStreamBatchMemOpFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 3 - {"CU_STREAM_MEM_OP_WAIT_VALUE_64", {"hipStreamBatchMemOpWaitValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 4 - {"CU_STREAM_MEM_OP_WRITE_VALUE_64", {"hipStreamBatchMemOpWriteValue64", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 5 - - // cudaStreamCaptureStatus - {"CUstreamCaptureStatus", {"hipStreamCaptureStatus", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamCaptureStatus_enum", {"hipStreamCaptureStatus", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamCaptureStatus enum values - // cudaStreamCaptureStatusNone - {"CU_STREAM_CAPTURE_STATUS_NONE", {"hipStreamCaptureStatusNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaStreamCaptureStatusActive - {"CU_STREAM_CAPTURE_STATUS_ACTIVE", {"hipStreamCaptureStatusActive", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaStreamCaptureStatusInvalidated - {"CU_STREAM_CAPTURE_STATUS_INVALIDATED", {"hipStreamCaptureStatusInvalidated", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - - // cudaStreamCaptureMode - {"CUstreamCaptureMode", {"hipStreamCaptureMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamCaptureMode_enum", {"hipStreamCaptureMode", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamCaptureMode enum values - // cudaStreamCaptureModeGlobal - {"CU_STREAM_CAPTURE_MODE_GLOBAL", {"hipStreamCaptureModeGlobal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0 - // cudaStreamCaptureModeThreadLocal - {"CU_STREAM_CAPTURE_MODE_THREAD_LOCAL", {"hipStreamCaptureModeThreadLocal", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1 - // cudaStreamCaptureModeRelaxed - {"CU_STREAM_CAPTURE_MODE_RELAXED", {"hipStreamCaptureModeRelaxed", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"CUstreamWaitValue_flags", {"hipStreamWaitValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamWaitValue_flags_enum", {"hipStreamWaitValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamWaitValue_flags enum values - {"CU_STREAM_WAIT_VALUE_GEQ", {"hipStreamWaitValueGeq", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_STREAM_WAIT_VALUE_EQ", {"hipStreamWaitValueEq", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_STREAM_WAIT_VALUE_AND", {"hipStreamWaitValueAnd", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_STREAM_WAIT_VALUE_FLUSH", {"hipStreamWaitValueFlush", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 1<<30 - - // no analogue - {"CUstreamWriteValue_flags", {"hipStreamWriteValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUstreamWriteValue_flags_enum", {"hipStreamWriteValueFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUstreamWriteValue_flags enum values - {"CU_STREAM_WRITE_VALUE_DEFAULT", {"hipStreamWriteValueDefault", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER", {"hipStreamWriteValueNoMemoryBarrier", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - - // cudaGLDeviceList - {"CUGLDeviceList", {"hipGLDeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUGLDeviceList_enum", {"hipGLDeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUGLDeviceList enum values - // cudaGLDeviceListAll = 1 - {"CU_GL_DEVICE_LIST_ALL", {"hipGLDeviceListAll", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGLDeviceListCurrentFrame = 2 - {"CU_GL_DEVICE_LIST_CURRENT_FRAME", {"hipGLDeviceListCurrentFrame", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaGLDeviceListNextFrame = 3 - {"CU_GL_DEVICE_LIST_NEXT_FRAME", {"hipGLDeviceListNextFrame", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaGLMapFlags - {"CUGLmap_flags", {"hipGLMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUGLmap_flags_enum", {"hipGLMapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUGLmap_flags enum values - // cudaGLMapFlagsNone = 0 - {"CU_GL_MAP_RESOURCE_FLAGS_NONE", {"hipGLMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaGLMapFlagsReadOnly = 1 - {"CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY", {"hipGLMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaGLMapFlagsWriteDiscard = 2 - {"CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD", {"hipGLMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D9DeviceList - {"CUd3d9DeviceList", {"hipD3D9DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9DeviceList_enum", {"hipD3D9DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9DeviceList enum values - // cudaD3D9DeviceListAll = 1 - {"CU_D3D9_DEVICE_LIST_ALL", {"HIP_D3D9_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D9DeviceListCurrentFrame = 2 - {"CU_D3D9_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D9DeviceListNextFrame = 3 - {"CU_D3D9_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaD3D9MapFlags - // NOTE: Deprecated - {"CUd3d9map_flags", {"hipD3D9MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9map_flags_enum", {"hipD3D9MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9map_flags enum values - // cudaD3D9MapFlagsNone = 0 - {"CU_D3D9_MAPRESOURCE_FLAGS_NONE", {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D9MapFlagsReadOnly = 1 - {"CU_D3D9_MAPRESOURCE_FLAGS_READONLY", {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D9MapFlagsWriteDiscard = 2 - {"CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D9RegisterFlags - {"CUd3d9register_flags", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d9register_flags_enum", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d9register_flags enum values - // cudaD3D9RegisterFlagsNone = 0 - {"CU_D3D9_REGISTER_FLAGS_NONE", {"HIP_D3D9_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D9RegisterFlagsArray = 1 - {"CU_D3D9_REGISTER_FLAGS_ARRAY", {"HIP_D3D9_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // cudaD3D10DeviceList - {"CUd3d10DeviceList", {"hipd3d10DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10DeviceList_enum", {"hipD3D10DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10DeviceList enum values - // cudaD3D10DeviceListAll = 1 - {"CU_D3D10_DEVICE_LIST_ALL", {"HIP_D3D10_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D10DeviceListCurrentFrame = 2 - {"CU_D3D10_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D10DeviceListNextFrame = 3 - {"CU_D3D10_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // cudaD3D10MapFlags - {"CUd3d10map_flags", {"hipD3D10MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10map_flags_enum", {"hipD3D10MapFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10map_flags enum values - // cudaD3D10MapFlagsNone = 0 - {"CU_D3D10_MAPRESOURCE_FLAGS_NONE", {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D10MapFlagsReadOnly = 1 - {"CU_D3D10_MAPRESOURCE_FLAGS_READONLY", {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D10MapFlagsWriteDiscard = 2 - {"CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - - // cudaD3D10RegisterFlags - {"CUd3d10register_flags", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d10register_flags_enum", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d10register_flags enum values - // cudaD3D10RegisterFlagsNone = 0 - {"CU_D3D10_REGISTER_FLAGS_NONE", {"HIP_D3D10_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x00 - // cudaD3D10RegisterFlagsArray = 1 - {"CU_D3D10_REGISTER_FLAGS_ARRAY", {"HIP_D3D10_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - - // cudaD3D11DeviceList - {"CUd3d11DeviceList", {"hipd3d11DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUd3d11DeviceList_enum", {"hipD3D11DeviceList", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUd3d11DeviceList enum values - // cudaD3D11DeviceListAll = 1 - {"CU_D3D11_DEVICE_LIST_ALL", {"HIP_D3D11_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaD3D11DeviceListCurrentFrame = 2 - {"CU_D3D11_DEVICE_LIST_CURRENT_FRAME", {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaD3D11DeviceListNextFrame = 3 - {"CU_D3D11_DEVICE_LIST_NEXT_FRAME", {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - - // no analogue - {"CUmemAllocationHandleType", {"hipMemoryAllocationHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationHandleType_enum", {"hipMemoryAllocationHandleType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAllocationHandleType enum values - {"CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR", {"HIP_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_HANDLE_TYPE_WIN32", {"HIP_MEM_HANDLE_TYPE_WIN32", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_MEM_HANDLE_TYPE_WIN32_KMT", {"HIP_MEM_HANDLE_TYPE_WIN32_KMT", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x4 - {"CU_MEM_HANDLE_TYPE_MAX", {"HIP_MEM_HANDLE_TYPE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemAccess_flags", {"hipMemoryAccessFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAccess_flags_enum", {"hipMemoryAccessFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAccess_flags enum values - {"CU_MEM_ACCESS_FLAGS_PROT_NONE", {"HIP_MEM_ACCESS_FLAGS_PROT_NONE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_ACCESS_FLAGS_PROT_READ", {"HIP_MEM_ACCESS_FLAGS_PROT_READ", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x2 - {"CU_MEM_ACCESS_FLAGS_PROT_READWRITE", {"HIP_MEM_ACCESS_FLAGS_PROT_READWRITE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x3 - {"CU_MEM_ACCESS_FLAGS_PROT_MAX", {"HIP_MEM_ACCESS_FLAGS_PROT_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemLocationType", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemLocationType_enum", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemLocationType enum values - {"CU_MEM_LOCATION_TYPE_INVALID", {"HIP_MEM_LOCATION_TYPE_INVALID", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_MEM_LOCATION_TYPE_DEVICE", {"HIP_MEM_LOCATION_TYPE_DEVICE", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CU_MEM_LOCATION_TYPE_MAX", {"HIP_MEM_LOCATION_TYPE_MAX", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0xFFFFFFFF - - // no analogue - {"CUmemAllocationGranularity_flags", {"hipMemoryAllocationGranularityFlags", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUmemAllocationGranularity_flags_enum", {"hipMemoryLocationType", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - // CUmemAllocationGranularity_flags enum values - {"CU_MEM_ALLOC_GRANULARITY_MINIMUM", {"HIP_MEM_ALLOC_GRANULARITY_MINIMUM", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0 - {"CU_MEM_ALLOC_GRANULARITY_RECOMMENDED", {"HIP_MEM_ALLOC_GRANULARITY_RECOMMENDED", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - - // 4. Typedefs - - // no analogue - {"CUdevice", {"hipDevice_t", "", CONV_TYPE, API_DRIVER}}, - {"CUdeviceptr", {"hipDeviceptr_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaHostFn_t - {"CUhostFn", {"hipHostFn", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // no analogue - {"CUoccupancyB2DSize", {"hipOccupancyB2DSize", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaStreamCallback_t - {"CUstreamCallback", {"hipStreamCallback_t", "", CONV_TYPE, API_DRIVER}}, - - // cudaSurfaceObject_t - {"CUsurfObject", {"hipSurfaceObject", "", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - - // cudaTextureObject_t - {"CUtexObject", {"hipTextureObject_t", "", CONV_TYPE, API_DRIVER}}, - - // 5. Defines - - {"__CUDACC__", {"__HIPCC__", "", CONV_DEFINE, API_DRIVER}}, - {"CUDA_CB", {"HIP_CB", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, - // cudaCpuDeviceId ((int)-1) - {"CU_DEVICE_CPU", {"hipCpuDeviceId", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUdevice)-1) - // cudaInvalidDeviceId ((int)-1) - {"CU_DEVICE_INVALID", {"hipInvalidDeviceId", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUdevice)-2) - // CUDA_IPC_HANDLE_SIZE - {"CU_IPC_HANDLE_SIZE", {"HIP_IPC_HANDLE_SIZE", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 64 - {"CU_LAUNCH_PARAM_BUFFER_POINTER", {"HIP_LAUNCH_PARAM_BUFFER_POINTER", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x01) - {"CU_LAUNCH_PARAM_BUFFER_SIZE", {"HIP_LAUNCH_PARAM_BUFFER_SIZE", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x02) - {"CU_LAUNCH_PARAM_END", {"HIP_LAUNCH_PARAM_END", "", CONV_DEFINE, API_DRIVER}}, // ((void*)0x00) - // cudaHostAllocPortable - {"CU_MEMHOSTALLOC_PORTABLE", {"hipHostMallocPortable", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaHostAllocMapped - {"CU_MEMHOSTALLOC_DEVICEMAP", {"hipHostMallocMapped", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaHostAllocWriteCombined - {"CU_MEMHOSTALLOC_WRITECOMBINED", {"hipHostMallocWriteCombined", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - // cudaHostRegisterPortable - {"CU_MEMHOSTREGISTER_PORTABLE", {"hipHostRegisterPortable", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaHostRegisterMapped - {"CU_MEMHOSTREGISTER_DEVICEMAP", {"hipHostRegisterMapped", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaHostRegisterIoMemory - {"CU_MEMHOSTREGISTER_IOMEMORY", {"hipHostRegisterIoMemory", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - {"CU_PARAM_TR_DEFAULT", {"HIP_PARAM_TR_DEFAULT", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // -1 - // cudaStreamLegacy ((cudaStream_t)0x1) - {"CU_STREAM_LEGACY", {"hipStreamLegacy", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUstream)0x1) - // cudaStreamPerThread ((cudaStream_t)0x2) - {"CU_STREAM_PER_THREAD", {"hipStreamPerThread", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // ((CUstream)0x2) - {"CU_TRSA_OVERRIDE_FORMAT", {"HIP_TRSA_OVERRIDE_FORMAT", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - {"CU_TRSF_NORMALIZED_COORDINATES", {"HIP_TRSF_NORMALIZED_COORDINATES", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - {"CU_TRSF_READ_AS_INTEGER", {"HIP_TRSF_READ_AS_INTEGER", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - {"CU_TRSF_SRGB", {"HIP_TRSF_SRGB", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // no analogue - // NOTE: Deprecated, use CUDA_ARRAY3D_LAYERED - {"CUDA_ARRAY3D_2DARRAY", {"HIP_ARRAY3D_2DARRAY", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaArrayLayered - {"CUDA_ARRAY3D_LAYERED", {"hipArrayLayered", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaArraySurfaceLoadStore - {"CUDA_ARRAY3D_SURFACE_LDST", {"hipArraySurfaceLoadStore", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaArrayCubemap - {"CUDA_ARRAY3D_CUBEMAP", {"hipArrayCubemap", "", CONV_DEFINE, API_DRIVER}}, // 0x04 - // cudaArrayTextureGather - {"CUDA_ARRAY3D_TEXTURE_GATHER", {"hipArrayTextureGather", "", CONV_DEFINE, API_DRIVER}}, // 0x08 - // no analogue - {"CUDA_ARRAY3D_DEPTH_TEXTURE", {"hipArrayDepthTexture", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - // cudaArrayColorAttachment - {"CUDA_ARRAY3D_COLOR_ATTACHMENT", {"hipArrayColorAttachment", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x20 - // cudaCooperativeLaunchMultiDeviceNoPreSync - {"CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC", {"hipCooperativeLaunchMultiDeviceNoPreSync", "", CONV_DEFINE, API_DRIVER}}, // 0x01 - // cudaCooperativeLaunchMultiDeviceNoPostSync - {"CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC", {"hipCooperativeLaunchMultiDeviceNoPostSync", "", CONV_DEFINE, API_DRIVER}}, // 0x02 - // cudaExternalMemoryDedicated - {"CUDA_EXTERNAL_MEMORY_DEDICATED", {"hipExternalMemoryDedicated", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaExternalSemaphoreSignalSkipNvSciBufMemSync - {"CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC", {"hipExternalSemaphoreSignalSkipNvSciBufMemSync", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - // cudaExternalSemaphoreWaitSkipNvSciBufMemSync - {"CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC", {"hipExternalSemaphoreWaitSkipNvSciBufMemSync", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - // cudaNvSciSyncAttrSignal - {"CUDA_NVSCISYNC_ATTR_SIGNAL", {"hipNvSciSyncAttrSignal", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - // cudaNvSciSyncAttrWait - {"CUDA_NVSCISYNC_ATTR_WAIT", {"hipNvSciSyncAttrWait", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x1 - {"CUDA_VERSION", {"HIP_VERSION", "", CONV_DEFINE, API_DRIVER, HIP_UNSUPPORTED}}, // 10000 -}; diff --git a/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp b/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp deleted file mode 100644 index 29e51f9b5c..0000000000 --- a/hipify-clang/src/CUDA2HIP_FFT_API_functions.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_FFT_FUNCTION_MAP{ - {"cufftPlan1d", {"hipfftPlan1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlan2d", {"hipfftPlan2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlan3d", {"hipfftPlan3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftPlanMany", {"hipfftPlanMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan1d", {"hipfftMakePlan1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan2d", {"hipfftMakePlan2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlan3d", {"hipfftMakePlan3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlanMany", {"hipfftMakePlanMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftMakePlanMany64", {"hipfftMakePlanMany64", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSizeMany64", {"hipfftGetSizeMany64", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate1d", {"hipfftEstimate1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate2d", {"hipfftEstimate2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimate3d", {"hipfftEstimate3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftEstimateMany", {"hipfftEstimateMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftCreate", {"hipfftCreate", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize1d", {"hipfftGetSize1d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize2d", {"hipfftGetSize2d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize3d", {"hipfftGetSize3d", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSizeMany", {"hipfftGetSizeMany", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetSize", {"hipfftGetSize", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetWorkArea", {"hipfftSetWorkArea", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetAutoAllocation", {"hipfftSetAutoAllocation", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecC2C", {"hipfftExecC2C", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecR2C", {"hipfftExecR2C", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecC2R", {"hipfftExecC2R", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecZ2Z", {"hipfftExecZ2Z", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecD2Z", {"hipfftExecD2Z", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftExecZ2D", {"hipfftExecZ2D", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftSetStream", {"hipfftSetStream", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftDestroy", {"hipfftDestroy", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetVersion", {"hipfftGetVersion", "", CONV_LIB_FUNC, API_FFT}}, - {"cufftGetProperty", {"hipfftGetProperty", "", CONV_LIB_FUNC, API_FFT, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp b/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp deleted file mode 100644 index 499afe7695..0000000000 --- a/hipify-clang/src/CUDA2HIP_FFT_API_types.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_FFT_TYPE_NAME_MAP{ - - // cuFFT defines - {"CUFFT_FORWARD", {"HIPFFT_FORWARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // -1 - {"CUFFT_INVERSE", {"HIPFFT_BACKWARD", "", CONV_NUMERIC_LITERAL, API_DNN}}, // 1 - {"CUFFT_COMPATIBILITY_DEFAULT", {"HIPFFT_COMPATIBILITY_DEFAULT", "", CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}}, // CUFFT_COMPATIBILITY_FFTW_PADDING - - // cuFFT enums - {"cufftResult_t", {"hipfftResult_t", "", CONV_TYPE, API_FFT}}, - {"cufftResult", {"hipfftResult", "", CONV_TYPE, API_FFT}}, - {"CUFFT_SUCCESS", {"HIPFFT_SUCCESS", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x0 0 - {"CUFFT_INVALID_PLAN", {"HIPFFT_INVALID_PLAN", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x1 1 - {"CUFFT_ALLOC_FAILED", {"HIPFFT_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2 2 - {"CUFFT_INVALID_TYPE", {"HIPFFT_INVALID_TYPE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x3 3 - {"CUFFT_INVALID_VALUE", {"HIPFFT_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x4 4 - {"CUFFT_INTERNAL_ERROR", {"HIPFFT_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x5 5 - {"CUFFT_EXEC_FAILED", {"HIPFFT_EXEC_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6 6 - {"CUFFT_SETUP_FAILED", {"HIPFFT_SETUP_FAILED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x7 7 - {"CUFFT_INVALID_SIZE", {"HIPFFT_INVALID_SIZE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x8 8 - {"CUFFT_UNALIGNED_DATA", {"HIPFFT_UNALIGNED_DATA", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x9 9 - {"CUFFT_INCOMPLETE_PARAMETER_LIST", {"HIPFFT_INCOMPLETE_PARAMETER_LIST", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xA 10 - {"CUFFT_INVALID_DEVICE", {"HIPFFT_INVALID_DEVICE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xB 11 - {"CUFFT_PARSE_ERROR", {"HIPFFT_PARSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xC 12 - {"CUFFT_NO_WORKSPACE", {"HIPFFT_NO_WORKSPACE", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xD 13 - {"CUFFT_NOT_IMPLEMENTED", {"HIPFFT_NOT_IMPLEMENTED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0xE 14 - {"CUFFT_LICENSE_ERROR", {"HIPFFT_LICENSE_ERROR", "", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED}}, - {"CUFFT_NOT_SUPPORTED", {"HIPFFT_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x10 16 - {"cufftType_t", {"hipfftType_t", "", CONV_TYPE, API_FFT}}, - {"cufftType", {"hipfftType", "", CONV_TYPE, API_FFT}}, - {"CUFFT_R2C", {"HIPFFT_R2C", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2a - {"CUFFT_C2R", {"HIPFFT_C2R", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x2c - {"CUFFT_C2C", {"HIPFFT_C2C", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x29 - {"CUFFT_D2Z", {"HIPFFT_D2Z", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6a - {"CUFFT_Z2D", {"HIPFFT_Z2D", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x6c - {"CUFFT_Z2Z", {"HIPFFT_Z2Z", "", CONV_NUMERIC_LITERAL, API_FFT}}, // 0x69 - {"cufftCompatibility_t", {"hipfftCompatibility_t", "", CONV_TYPE, API_FFT, HIP_UNSUPPORTED}}, - {"cufftCompatibility", {"hipfftCompatibility", "", CONV_TYPE, API_FFT, HIP_UNSUPPORTED}}, - {"CUFFT_COMPATIBILITY_FFTW_PADDING", {"HIPFFT_COMPATIBILITY_FFTW_PADDING", "", CONV_NUMERIC_LITERAL, API_FFT, HIP_UNSUPPORTED}}, // 0x01 - - // cuFFT types - {"cufftReal", {"hipfftReal", "", CONV_TYPE, API_FFT}}, - {"cufftDoubleReal", {"hipfftDoubleReal", "", CONV_TYPE, API_FFT}}, - {"cufftComplex", {"hipfftComplex", "", CONV_TYPE, API_FFT}}, - {"cufftDoubleComplex", {"hipfftDoubleComplex", "", CONV_TYPE, API_FFT}}, - {"cufftHandle", {"hipfftHandle", "", CONV_TYPE, API_FFT}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Perl.cpp b/hipify-clang/src/CUDA2HIP_Perl.cpp deleted file mode 100644 index d74ba70f21..0000000000 --- a/hipify-clang/src/CUDA2HIP_Perl.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Path.h" -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "Statistics.h" - -namespace perl { - - using namespace std; - using namespace llvm; - - const string sCopyright = - "##\n" - "# Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.\n" - "#\n" - "# Permission is hereby granted, free of charge, to any person obtaining a copy\n" - "# of this software and associated documentation files (the \"Software\"), to deal\n" - "# in the Software without restriction, including without limitation the rights\n" - "# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - "# copies of the Software, and to permit persons to whom the Software is\n" - "# furnished to do so, subject to the following conditions:\n" - "#\n" - "# The above copyright notice and this permission notice shall be included in\n" - "# all copies or substantial portions of the Software.\n" - "#\n" - "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - "# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - "# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n" - "# THE SOFTWARE.\n" - "##\n"; - - const string sImportant = "# IMPORTANT: Do not change this file manually: it is generated by hipify-clang --perl"; - const string tab = " "; - const string tab_2 = tab + tab; - const string tab_3 = tab_2 + tab; - const string tab_4 = tab_3 + tab; - const string tab_5 = tab_4 + tab; - const string tab_6 = tab_5 + tab; - const string endl_2 = "\n\n"; - const string endl_tab = "\n" + tab; - const string endl_tab_2 = "\n" + tab_2; - const string endl_tab_3 = "\n" + tab_3; - const string endl_tab_4 = "\n" + tab_4; - const string endl_tab_5 = "\n" + tab_5; - const string sub = "sub "; - const string my = "my "; - const string my_k = my + "$k = 0;"; - const string return_0 = "return 0;\n"; - const string return_k = "return $k;\n"; - const string while_ = "while "; - const string unless_ = "unless "; - const string foreach = "foreach "; - const string foreach_func = foreach + "$func (\n"; - const string print = "print STDERR "; - const string printf = "printf STDERR "; - const string no_warns = "no warnings qw/uninitialized/;"; - const string hipify_perl = "hipify-perl"; - - const string sCudaDevice = "cudaDevice"; - const string sCudaDeviceId = "cudaDeviceId"; - const string sCudaDevices = "cudaDevices"; - const string sCudaDevice_t = "cudaDevice_t"; - const string sCudaIDs = "cudaIDs"; - const string sCudaGridDim = "cudaGridDim"; - const string sCudaDimGrid = "cudaDimGrid"; - const string sCudaDimBlock = "cudaDimBlock"; - const string sCudaGradInput = "cudaGradInput"; - const string sCudaGradOutput = "cudaGradOutput"; - const string sCudaInput = "cudaInput"; - const string sCudaOutput = "cudaOutput"; - const string sCudaIndices = "cudaIndices"; - const string sCudaGaugeField = "cudaGaugeField"; - const string sCudaMom = "cudaMom"; - const string sCudaGauge = "cudaGauge"; - const string sCudaInGauge = "cudaInGauge"; - const string sCudaColorSpinorField = "cudaColorSpinorField"; - const string sCudaSiteLink = "cudaSiteLink"; - const string sCudaFatLink = "cudaFatLink"; - const string sCudaStaple = "cudaStaple"; - const string sCudaCloverField = "cudaCloverField"; - const string sCudaParam = "cudaParam"; - - const set Whitelist{ - {sCudaDevice}, {sCudaDevice_t}, {sCudaIDs}, {sCudaGridDim}, {sCudaDimGrid}, {sCudaDimBlock}, {sCudaDeviceId}, {sCudaDevices}, - {sCudaGradInput}, {sCudaGradOutput}, {sCudaInput}, {sCudaOutput}, {sCudaIndices}, {sCudaGaugeField}, {sCudaMom}, {sCudaGauge}, - {sCudaInGauge}, {sCudaColorSpinorField}, {sCudaSiteLink}, {sCudaFatLink}, {sCudaStaple}, {sCudaCloverField}, {sCudaParam} - }; - - void generateHeader(unique_ptr &streamPtr) { - *streamPtr.get() << "#!/usr/bin/perl -w" << endl_2; - *streamPtr.get() << sCopyright << endl; - *streamPtr.get() << sImportant << endl_2; - *streamPtr.get() << "#usage " << hipify_perl << " [OPTIONS] INPUT_FILE" << endl_2; - *streamPtr.get() << "use Getopt::Long;" << endl; - *streamPtr.get() << my << "$whitelist = \"\";" << endl; - *streamPtr.get() << my << "$fileName = \"\";" << endl; - *streamPtr.get() << my << "%ft;" << endl; - *streamPtr.get() << my << "%Tkernels;" << endl_2; - *streamPtr.get() << "GetOptions(" << endl; - *streamPtr.get() << tab << " \"examine\" => \\$examine # Combines -no-output and -print-stats options." << endl; - *streamPtr.get() << tab << ", \"inplace\" => \\$inplace # Modify input file inplace, replacing input with hipified output, save backup in .prehip file." << endl; - *streamPtr.get() << tab << ", \"no-output\" => \\$no_output # Don't write any translated output to stdout." << endl; - *streamPtr.get() << tab << ", \"print-stats\" => \\$print_stats # Print translation statistics." << endl; - *streamPtr.get() << tab << ", \"quiet-warnings\" => \\$quiet_warnings # Don't print warnings on unknown CUDA functions." << endl; - *streamPtr.get() << tab << ", \"whitelist=s\" => \\$whitelist # TODO: test it beforehand" << endl; - *streamPtr.get() << ");" << endl_2; - *streamPtr.get() << "$print_stats = 1 if $examine;" << endl; - *streamPtr.get() << "$no_output = 1 if $examine;" << endl_2; - *streamPtr.get() << "# Whitelist of cuda[A-Z] identifiers, which are commonly used in CUDA sources but don't map to any CUDA API:" << endl; - *streamPtr.get() << "@whitelist = ("; - unsigned int num = 0; - for (const string &m : Whitelist) { - *streamPtr.get() << endl_tab << (num ? ", " : " ") << "\"" << m << "\""; - ++num; - } - *streamPtr.get() << endl << ");" << endl_2; - *streamPtr.get() << "push(@whitelist, split(',', $whitelist));" << endl_2; - } - - void generateStatFunctions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "totalStats" << " {" << endl; - *streamPtr.get() << tab << my << "%count = %{ shift() };" << endl; - *streamPtr.get() << tab << my << "$total = 0;" << endl; - *streamPtr.get() << tab << foreach << "$key (keys %count) {" << endl; - *streamPtr.get() << tab_2 << "$total += $count{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "return $total;" << endl << "};" << endl; - *streamPtr.get() << endl << sub << "printStats" << " {" << endl; - *streamPtr.get() << tab << my << "$label = shift();" << endl; - *streamPtr.get() << tab << my << "@statNames = @{ shift() };" << endl; - *streamPtr.get() << tab << my << "%counts = %{ shift() };" << endl; - *streamPtr.get() << tab << my << "$warnings = shift();" << endl; - *streamPtr.get() << tab << my << "$loc = shift();" << endl; - *streamPtr.get() << tab << my << "$total = totalStats(\\%counts);" << endl; - *streamPtr.get() << tab << printf << "\"%s %d CUDA->HIP refs ( \", $label, $total;" << endl; - *streamPtr.get() << tab << foreach << "$stat (@statNames) {" << endl; - *streamPtr.get() << tab_2 << printf << "\"%s:%d \", $stat, $counts{$stat};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << printf << "\")\\n warn:%d LOC:%d\", $warnings, $loc;" << endl << "}" << endl; - for (int i = 0; i < 2; ++i) { - *streamPtr.get() << endl << sub << (i ? "clearStats" : "addStats") << " {" << endl; - *streamPtr.get() << tab << my << "$dest_ref = shift();" << endl; - *streamPtr.get() << tab << my << (i ? "@statNames = @{ shift() };" : "%adder = %{ shift() };") << endl; - *streamPtr.get() << tab << foreach << (i ? "$stat(@statNames)" : "$key (keys %adder)") << " {" << endl; - *streamPtr.get() << tab_2 << "$dest_ref->" << (i ? "{$stat} = 0;" : "{$key} += $adder{$key};") << endl_tab << "}" << endl << "}" << endl; - } - } - - void generateSimpleSubstitutions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "simpleSubstitutions" << " {" << endl; - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (i == CONV_INCLUDE_CUDA_MAIN_H || i == CONV_INCLUDE) { - for (auto &ma : CUDA_INCLUDE_MAP) { - if (Statistics::isUnsupported(ma.second)) continue; - if (i == ma.second.type) { - string sCUDA = ma.first.str(); - string sHIP = ma.second.hipName.str(); - sCUDA = regex_replace(sCUDA, regex("/"), "\\/"); - sHIP = regex_replace(sHIP, regex("/"), "\\/"); - *streamPtr.get() << tab << "$ft{'" << counterNames[ma.second.type] << "'} += s/\\b" << sCUDA << "\\b/" << sHIP << "/g;" << endl; - } - } - } else { - for (auto &ma : CUDA_RENAMES_MAP()) { - if (Statistics::isUnsupported(ma.second)) continue; - if (i == ma.second.type) { - *streamPtr.get() << tab << "$ft{'" << counterNames[ma.second.type] << "'} += s/\\b" << ma.first.str() << "\\b/" << ma.second.hipName.str() << "/g;" << endl; - } - } - } - } - *streamPtr.get() << "}" << endl; - } - - void generateExternShared(unique_ptr &streamPtr) { - *streamPtr.get() << endl << "# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro" << endl; - *streamPtr.get() << sub << "transformExternShared" << " {" << endl; - *streamPtr.get() << tab << no_warns << endl; - *streamPtr.get() << tab << my_k << endl; - *streamPtr.get() << tab << "$k += s/extern\\s+([\\w\\(\\)]+)?\\s*__shared__\\s+([\\w:<>\\s]+)\\s+(\\w+)\\s*\\[\\s*\\]\\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g;" << endl; - *streamPtr.get() << tab << "$ft{'extern_shared'} += $k;" << endl << "}" << endl; - } - - void generateKernelLaunch(unique_ptr &streamPtr) { - *streamPtr.get() << endl << "# CUDA Kernel Launch Syntax" << endl << sub << "transformKernelLaunch" << " {" << endl; - *streamPtr.get() << tab << no_warns << endl; - *streamPtr.get() << tab << my_k << endl_2; - - string s_k = "$k += s/([:|\\w]+)\\s*"; - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, $6, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, $5, /g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), $5, 0, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), $4, 0, /g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0)/g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>() syntax with empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\((\\s*)\\)/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0)/g;" << endl_2; - - *streamPtr.get() << tab << "# Handle the kern<...><<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<(.+)>\\s*<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL(HIP_KERNEL_NAME($1<$2>), dim3($3), dim3($4), 0, 0, /g;" << endl; - *streamPtr.get() << tab << "# Handle the kern<<>>(...) syntax with non-empty args:" << endl; - *streamPtr.get() << tab << s_k << "<<<\\s*(.+)\\s*,\\s*(.+)\\s*>>>(\\s*)\\(/hipLaunchKernelGGL($1, dim3($2), dim3($3), 0, 0, /g;" << endl_2; - - *streamPtr.get() << tab << "if ($k) {" << endl; - *streamPtr.get() << tab_2 << "$ft{'kernel_launch'} += $k;" << endl; - *streamPtr.get() << tab_2 << "$Tkernels{$1}++;" << endl_tab << "}" << endl << "}" << endl; - } - - void generateCubNamespace(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "transformCubNamespace" << " {" << endl_tab << my_k << endl; - *streamPtr.get() << tab << "$k += s/using\\s*namespace\\s*cub/using namespace hipcub/g;" << endl; - *streamPtr.get() << tab << "$k += s/\\bcub::\\b/hipcub::/g;" << endl << tab << return_k << "}" << endl; - } - - void generateHostFunctions(unique_ptr &streamPtr) { - *streamPtr.get() << endl << sub << "transformHostFunctions" << " {" << endl_tab << my_k << endl; - set &funcSet = DeviceSymbolFunctions0; - const string s0 = "$k += s/(?second.hipName.str() << "\""; - count++; - } - } - *streamPtr.get() << endl_tab << ")" << endl_tab << "{" << endl_tab_2; - switch (i) { - case 0: - default: *streamPtr.get() << s0 << sHIP_SYMBOL << "\\($2\\),/g" << endl; break; - case 1: *streamPtr.get() << s1 << sHIP_SYMBOL << "\\($3\\)$4/g;" << endl; break; - case 2: *streamPtr.get() << s0 << s_reinterpret_cast << "\\($2\\),/g" << endl; break; - case 3: *streamPtr.get() << s1 << s_reinterpret_cast << "\\($3\\)$4/g;" << endl; break; - } - *streamPtr.get() << tab << "}" << endl; - } - *streamPtr.get() << tab << return_k << "}" << endl; - } - - void generateDeviceFunctions(unique_ptr &streamPtr) { - unsigned int countUnsupported = 0; - unsigned int countSupported = 0; - stringstream sSupported; - stringstream sUnsupported; - for (auto &ma : CUDA_DEVICE_FUNC_MAP) { - bool isUnsupported = Statistics::isUnsupported(ma.second); - (isUnsupported ? sUnsupported : sSupported) << ((isUnsupported && countUnsupported) || (!isUnsupported && countSupported) ? ",\n" : "") << tab_2 << "\"" << ma.first.str() << "\""; - if (isUnsupported) countUnsupported++; - else countSupported++; - } - stringstream subCountSupported; - stringstream subWarnUnsupported; - stringstream subCommon; - string sCommon = tab + my_k + "\n" + tab + foreach_func; - subCountSupported << endl << sub << "countSupportedDeviceFunctions" << " {" << endl << (countSupported ? sCommon : tab + return_0); - subWarnUnsupported << endl << sub << "warnUnsupportedDeviceFunctions" << " {" << endl << (countUnsupported ? tab + my + "$line_num = shift;\n" + sCommon : tab + return_0); - if (countSupported) subCountSupported << sSupported.str() << endl_tab << ")" << endl; - if (countUnsupported) subWarnUnsupported << sUnsupported.str() << endl_tab << ")" << endl; - if (countSupported || countUnsupported) { - subCommon << tab << "{" << endl; - subCommon << tab_2 << "# match device function from the list, except those, which have a namespace prefix (aka somenamespace::umin(...));" << endl; - subCommon << tab_2 << "# function with only global namespace qualifier '::' (aka ::umin(...)) should be treated as a device function (and warned as well as without such qualifier);" << endl; - subCommon << tab_2 << my << "$mt_namespace = m/(\\w+)::($func)\\s*\\(\\s*.*\\s*\\)/g;" << endl; - subCommon << tab_2 << my << "$mt = m/($func)\\s*\\(\\s*.*\\s*\\)/g;" << endl; - subCommon << tab_2 << "if ($mt && !$mt_namespace) {" << endl; - subCommon << tab_3 << "$k += $mt;" << endl; - } - if (countSupported) subCountSupported << subCommon.str(); - if (countUnsupported) { - subWarnUnsupported << subCommon.str(); - subWarnUnsupported << tab_3 << print << "\" warning: $fileName:$line_num: unsupported device function \\\"$func\\\": $_\\n\";" << endl; - } - if (countSupported || countUnsupported) sCommon = tab_2 + "}\n" + tab + "}\n" + tab + return_k; - if (countSupported) subCountSupported << sCommon; - if (countUnsupported) subWarnUnsupported << sCommon; - subCountSupported << "}" << endl; - subWarnUnsupported << "}" << endl; - *streamPtr.get() << subCountSupported.str(); - *streamPtr.get() << subWarnUnsupported.str(); - } - - bool generate(bool Generate) { - if (!Generate) return true; - string dstHipifyPerl = hipify_perl, dstHipifyPerlDir = OutputHipifyPerlDir; - error_code EC; - if (!dstHipifyPerlDir.empty()) { - string sOutputHipifyPerlDirAbsPath = getAbsoluteDirectoryPath(OutputHipifyPerlDir, EC, "output " + hipify_perl); - if (EC) return false; - dstHipifyPerl = sOutputHipifyPerlDirAbsPath + "/" + dstHipifyPerl; - } - SmallString<128> tmpFile; - EC = sys::fs::createTemporaryFile(dstHipifyPerl, hipify_perl, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - return false; - } - unique_ptr streamPtr = unique_ptr(new ofstream(tmpFile.c_str(), ios_base::trunc)); - generateHeader(streamPtr); - string sConv = my + "$apiCalls = "; - unsigned int exclude[3] = { CONV_DEVICE_FUNC, CONV_EXTERN_SHARED, CONV_KERNEL_LAUNCH }; - *streamPtr.get() << "@statNames = ("; - for (unsigned int i = 0; i < NUM_CONV_TYPES - 1; ++i) { - *streamPtr.get() << "\"" << counterNames[i] << "\", "; - if (any_of(exclude, exclude + 3, [&i](unsigned int x) { return x == i; })) continue; - sConv += "$ft{'" + string(counterNames[i]) + "'}" + (i < NUM_CONV_TYPES - 2 ? " + " : ";"); - } - if (sConv.back() == ' ') sConv = sConv.substr(0, sConv.size() - 3) + ";"; - *streamPtr.get() << "\"" << counterNames[NUM_CONV_TYPES - 1] << "\");" << endl; - generateStatFunctions(streamPtr); - generateSimpleSubstitutions(streamPtr); - generateExternShared(streamPtr); - generateKernelLaunch(streamPtr); - generateCubNamespace(streamPtr); - generateHostFunctions(streamPtr); - generateDeviceFunctions(streamPtr); - *streamPtr.get() << endl << "# Count of transforms in all files" << endl; - *streamPtr.get() << my << "%tt;" << endl; - *streamPtr.get() << "clearStats(\\%tt, \\@statNames);" << endl; - *streamPtr.get() << "$Twarnings = 0;" << endl; - *streamPtr.get() << "$TlineCount = 0;" << endl; - *streamPtr.get() << my << "%TwarningTags;" << endl; - *streamPtr.get() << my << "$fileCount = @ARGV;" << endl_2; - *streamPtr.get() << while_ << "(@ARGV) {" << endl; - *streamPtr.get() << tab << "$fileName=shift (@ARGV);" << endl; - *streamPtr.get() << tab << "if ($inplace) {" << endl; - *streamPtr.get() << tab_2 << my << "$file_prehip = \"$fileName\" . \".prehip\";" << endl; - *streamPtr.get() << tab_2 << my << "$infile;" << endl; - *streamPtr.get() << tab_2 << my << "$outfile;" << endl; - *streamPtr.get() << tab_2 << "if (-e $file_prehip) {" << endl; - *streamPtr.get() << tab_3 << "$infile = $file_prehip;" << endl; - *streamPtr.get() << tab_3 << "$outfile = $fileName;" << endl; - *streamPtr.get() << tab_2 << "} else {" << endl; - *streamPtr.get() << tab_3 << "system (\"cp $fileName $file_prehip\");" << endl; - *streamPtr.get() << tab_3 << "$infile = $file_prehip;" << endl; - *streamPtr.get() << tab_3 << "$outfile = $fileName;" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "open(INFILE,\"<\", $infile) or die \"error: could not open $infile\";" << endl; - *streamPtr.get() << tab_2 << "open(OUTFILE,\">\", $outfile) or die \"error: could not open $outfile\";" << endl; - *streamPtr.get() << tab_2 << "$OUTFILE = OUTFILE;" << endl; - *streamPtr.get() << tab << "} else {" << endl; - *streamPtr.get() << tab_2 << "open(INFILE,\"<\", $fileName) or die \"error: could not open $fileName\";" << endl; - *streamPtr.get() << tab_2 << "$OUTFILE = STDOUT;" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "# Count of transforms in this file" << endl; - *streamPtr.get() << tab << "clearStats(\\%ft, \\@statNames);" << endl; - *streamPtr.get() << tab << my << "$countIncludes = 0;" << endl; - *streamPtr.get() << tab << my << "$countKeywords = 0;" << endl; - *streamPtr.get() << tab << my << "$warnings = 0;" << endl; - *streamPtr.get() << tab << my << "%warningTags;" << endl; - *streamPtr.get() << tab << my << "$lineCount = 0;" << endl; - *streamPtr.get() << tab << "undef $/;" << endl; - *streamPtr.get() << tab << "# Read whole file at once, so we can match newlines" << endl; - *streamPtr.get() << tab << while_ << "() {" << endl; - *streamPtr.get() << tab_2 << "$countKeywords += m/__global__/;" << endl; - *streamPtr.get() << tab_2 << "$countKeywords += m/__shared__/;" << endl; - *streamPtr.get() << tab_2 << "simpleSubstitutions();" << endl; - *streamPtr.get() << tab_2 << "transformExternShared();" << endl; - *streamPtr.get() << tab_2 << "transformKernelLaunch();" << endl; - *streamPtr.get() << tab_2 << "transformCubNamespace();" << endl; - *streamPtr.get() << tab_2 << "if ($print_stats) {" << endl; - *streamPtr.get() << tab_3 << while_ << "(/(\\b(hip|HIP)([A-Z]|_)\\w+\\b)/g) {" << endl; - *streamPtr.get() << tab_4 << "$convertedTags{$1}++;" << endl_tab_3 << "}" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << my << "$hasDeviceCode = $countKeywords + $ft{'device_function'};" << endl; - *streamPtr.get() << tab_2 << unless_ << "($quiet_warnings) {" << endl; - *streamPtr.get() << tab_3 << "# Copy into array of lines, process line-by-line to show warnings" << endl; - *streamPtr.get() << tab_3 << "if ($hasDeviceCode or (/\\bcu|CU/) or (/<<<.*>>>/)) {" << endl; - *streamPtr.get() << tab_4 << my << "@lines = split /\\n/, $_;" << endl; - *streamPtr.get() << tab_4 << "# Copy the whole file" << endl; - *streamPtr.get() << tab_4 << my << "$tmp = $_;" << endl; - *streamPtr.get() << tab_4 << my << "$line_num = 0;" << endl; - *streamPtr.get() << tab_4 << foreach << "(@lines) {" << endl; - *streamPtr.get() << tab_5 << "$line_num++;" << endl; - *streamPtr.get() << tab_5 << "# Remove any whitelisted words" << endl; - *streamPtr.get() << tab_5 << foreach << "$w (@whitelist) {" << endl; - *streamPtr.get() << tab_6 << "s/\\b$w\\b/ZAP/" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << my << "$tag;" << endl; - *streamPtr.get() << tab_5 << "if ((/(\\bcuda[A-Z]\\w+)/) or (/<<<.*>>>/)) {" << endl; - *streamPtr.get() << tab_6 << "# Flag any remaining code that look like cuda API calls: may want to add these to hipify" << endl; - *streamPtr.get() << tab_6 << "$tag = (defined $1) ? $1 : \"Launch\";" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << "if (defined $tag) {" << endl; - *streamPtr.get() << tab_6 << "$warnings++;" << endl; - *streamPtr.get() << tab_6 << "$warningTags{$tag}++;" << endl; - *streamPtr.get() << tab_6 << print << "\" warning: $fileName:#$line_num : $_\\n\";" << endl_tab_5 << "}" << endl; - *streamPtr.get() << tab_5 << "$s = warnUnsupportedDeviceFunctions($line_num);" << endl; - *streamPtr.get() << tab_5 << "$warnings += $s;" << endl_tab_4 << "}" << endl; - *streamPtr.get() << tab_4 << "$_ = $tmp;" << endl_tab_3 << "}" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "if ($hasDeviceCode > 0) {" << endl; - *streamPtr.get() << tab_3 << "$ft{'device_function'} += countSupportedDeviceFunctions();" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "transformHostFunctions();" << endl; - *streamPtr.get() << tab_2 << "# TODO: would like to move this code outside loop but it uses $_ which contains the whole file" << endl; - *streamPtr.get() << tab_2 << unless_ << "($no_output) {" << endl; - *streamPtr.get() << tab_3 << sConv << endl; - *streamPtr.get() << tab_3 << my << "$kernStuff = $hasDeviceCode + $ft{'" << counterNames[CONV_KERNEL_LAUNCH] << "'} + $ft{'" << counterNames[CONV_DEVICE_FUNC] << "'};" << endl; - *streamPtr.get() << tab_3 << my << "$totalCalls = $apiCalls + $kernStuff;" << endl; - *streamPtr.get() << tab_3 << "$is_dos = m/\\r\\n$/;" << endl; - *streamPtr.get() << tab_3 << "if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) {" << endl; - *streamPtr.get() << tab_4 << "# TODO: implement hipify-clang's logic with header files AMAP" << endl; - *streamPtr.get() << tab_4 << "print $OUTFILE '#include \"hip/hip_runtime.h\"' . ($is_dos ? \"\\r\\n\" : \"\\n\");" << endl_tab_3 << "}" << endl; - *streamPtr.get() << tab_3 << "print $OUTFILE \"$_\";" << endl_tab_2 << "}" << endl; - *streamPtr.get() << tab_2 << "$lineCount = $_ =~ tr/\\n//;" << endl_tab << "}" << endl; - *streamPtr.get() << tab << my << "$totalConverted = totalStats(\\%ft);" << endl; - *streamPtr.get() << tab << "if (($totalConverted+$warnings) and $print_stats) {" << endl; - *streamPtr.get() << tab_2 << "printStats(\" info: converted\", \\@statNames, \\%ft, $warnings, $lineCount);" << endl; - *streamPtr.get() << tab_2 << print << "\" in '$fileName'\\n\";" << endl_tab << "}" << endl; - *streamPtr.get() << tab << "# Update totals for all files" << endl; - *streamPtr.get() << tab << "addStats(\\%tt, \\%ft);" << endl; - *streamPtr.get() << tab << "$Twarnings += $warnings;" << endl; - *streamPtr.get() << tab << "$TlineCount += $lineCount;" << endl; - *streamPtr.get() << tab << foreach << "$key (keys %warningTags) {" << endl; - *streamPtr.get() << tab_2 << "$TwarningTags{$key} += $warningTags{$key};" << endl_tab << "}" << endl << "}" << endl; - *streamPtr.get() << "# Print total stats for all files processed:" << endl; - *streamPtr.get() << "if ($print_stats and ($fileCount > 1)) {" << endl; - *streamPtr.get() << tab << print << "\"\\n\";" << endl; - *streamPtr.get() << tab << "printStats(\" info: TOTAL-converted\", \\@statNames, \\%tt, $Twarnings, $TlineCount);" << endl; - *streamPtr.get() << tab << print << "\"\\n\";" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" warning: unconverted %s : %d\\n\", $key, $TwarningTags{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << my << "$kernelCnt = keys %Tkernels;" << endl; - *streamPtr.get() << tab << printf << "\" kernels (%d total) : \", $kernelCnt;" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" %s(%d)\", $key, $Tkernels{$key};" << endl_tab << "}" << endl; - *streamPtr.get() << tab << print << "\"\\n\\n\";" << endl << "}" << endl; - *streamPtr.get() << "if ($print_stats) {" << endl; - *streamPtr.get() << tab << foreach << "my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) {" << endl; - *streamPtr.get() << tab_2 << printf << "\" %s %d\\n\", $key, $convertedTags{$key};" << endl_tab << "}" << endl << "}" << endl; - streamPtr.get()->flush(); - bool ret = true; - EC = sys::fs::copy_file(tmpFile, dstHipifyPerl); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dstHipifyPerl << "\n"; - ret = false; - } - if (!SaveTemps) sys::fs::remove(tmpFile); - return ret; - } -} diff --git a/hipify-clang/src/CUDA2HIP_Python.cpp b/hipify-clang/src/CUDA2HIP_Python.cpp deleted file mode 100644 index fec138915d..0000000000 --- a/hipify-clang/src/CUDA2HIP_Python.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/Path.h" -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "Statistics.h" - -using namespace llvm; - -namespace python { - - bool generate(bool Generate) { - if (!Generate) { - return true; - } - std::string dstPythonMap = "cuda_to_hip_mappings.py", dstPythonMapDir = OutputPythonMapDir; - std::error_code EC; - if (!dstPythonMapDir.empty()) { - std::string sOutputPythonMapDirAbsPath = getAbsoluteDirectoryPath(OutputPythonMapDir, EC, "output hipify-python map"); - if (EC) { - return false; - } - dstPythonMap = sOutputPythonMapDirAbsPath + "/" + dstPythonMap; - } - SmallString<128> tmpFile; - StringRef ext = "hipify-tmp"; - EC = sys::fs::createTemporaryFile(dstPythonMap, ext, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - return false; - } - std::unique_ptr pythonStreamPtr = std::unique_ptr(new std::ofstream(tmpFile.c_str(), std::ios_base::trunc)); - *pythonStreamPtr.get() << "import collections\n\n"; - *pythonStreamPtr.get() << "from pyHIPIFY.constants import *\n\n"; - *pythonStreamPtr.get() << "CUDA_RENAMES_MAP = collections.OrderedDict([\n"; - const std::string sHIP_UNS = ", HIP_UNSUPPORTED"; - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (i == CONV_INCLUDE_CUDA_MAIN_H || i == CONV_INCLUDE) { - for (auto &ma : CUDA_INCLUDE_MAP) { - if (i == ma.second.type) { - std::string sUnsupported; - if (Statistics::isUnsupported(ma.second)) { - sUnsupported = sHIP_UNS; - } - StringRef repName = Statistics::isToRoc(ma.second) ? ma.second.rocName : ma.second.hipName; - *pythonStreamPtr.get() << " (\"" << ma.first.str() << "\", (\"" << repName.str() << "\", " << counterTypes[i] << ", " << apiTypes[ma.second.apiType] << sUnsupported << ")),\n"; - } - } - } - else { - for (auto &ma : CUDA_RENAMES_MAP()) { - if (i == ma.second.type) { - std::string sUnsupported; - if (Statistics::isUnsupported(ma.second)) { - sUnsupported = sHIP_UNS; - } - StringRef repName = Statistics::isToRoc(ma.second) ? ma.second.rocName : ma.second.hipName; - *pythonStreamPtr.get() << " (\"" << ma.first.str() << "\", (\"" << repName.str() << "\", " << counterTypes[i] << ", " << apiTypes[ma.second.apiType] << sUnsupported << ")),\n"; - } - } - } - } - *pythonStreamPtr.get() << "])\n\n"; - *pythonStreamPtr.get() << "CUDA_TO_HIP_MAPPINGS = [CUDA_RENAMES_MAP, C10_MAPPINGS, PYTORCH_SPECIFIC_MAPPINGS]\n"; - pythonStreamPtr.get()->flush(); - bool ret = true; - EC = sys::fs::copy_file(tmpFile, dstPythonMap); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dstPythonMap << "\n"; - ret = false; - } - if (!SaveTemps) { - sys::fs::remove(tmpFile); - } - return true; - } -} diff --git a/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp b/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp deleted file mode 100644 index 525aa1e5f3..0000000000 --- a/hipify-clang/src/CUDA2HIP_RAND_API_functions.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_RAND_FUNCTION_MAP{ - // RAND Host functions - {"curandCreateGenerator", {"hiprandCreateGenerator", "", CONV_LIB_FUNC, API_RAND}}, - {"curandCreateGeneratorHost", {"hiprandCreateGeneratorHost", "", CONV_LIB_FUNC, API_RAND}}, - {"curandCreatePoissonDistribution", {"hiprandCreatePoissonDistribution", "", CONV_LIB_FUNC, API_RAND}}, - {"curandDestroyDistribution", {"hiprandDestroyDistribution", "", CONV_LIB_FUNC, API_RAND}}, - {"curandDestroyGenerator", {"hiprandDestroyGenerator", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerate", {"hiprandGenerate", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLogNormal", {"hiprandGenerateLogNormal", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLogNormalDouble", {"hiprandGenerateLogNormalDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateLongLong", {"hiprandGenerateLongLong", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGenerateNormal", {"hiprandGenerateNormal", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateNormalDouble", {"hiprandGenerateNormalDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGeneratePoisson", {"hiprandGeneratePoisson", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateSeeds", {"hiprandGenerateSeeds", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateUniform", {"hiprandGenerateUniform", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGenerateUniformDouble", {"hiprandGenerateUniformDouble", "", CONV_LIB_FUNC, API_RAND}}, - {"curandGetDirectionVectors32", {"hiprandGetDirectionVectors32", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetDirectionVectors64", {"hiprandGetDirectionVectors64", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetProperty", {"hiprandGetProperty", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetScrambleConstants32", {"hiprandGetScrambleConstants32", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetScrambleConstants64", {"hiprandGetScrambleConstants64", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandGetVersion", {"hiprandGetVersion", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetGeneratorOffset", {"hiprandSetGeneratorOffset", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetGeneratorOrdering", {"hiprandSetGeneratorOrdering", "", CONV_LIB_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curandSetPseudoRandomGeneratorSeed", {"hiprandSetPseudoRandomGeneratorSeed", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetQuasiRandomGeneratorDimensions", {"hiprandSetQuasiRandomGeneratorDimensions", "", CONV_LIB_FUNC, API_RAND}}, - {"curandSetStream", {"hiprandSetStream", "", CONV_LIB_FUNC, API_RAND}}, - {"curandMakeMTGP32Constants", {"hiprandMakeMTGP32Constants", "", CONV_LIB_FUNC, API_RAND}}, - {"curandMakeMTGP32KernelState", {"hiprandMakeMTGP32KernelState", "", CONV_LIB_FUNC, API_RAND}}, - - // RAND Device functions - {"curand", {"hiprand", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_init", {"hiprand_init", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal", {"hiprand_log_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal_double", {"hiprand_log_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal2", {"hiprand_log_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal2_double", {"hiprand_log_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal4", {"hiprand_log_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_log_normal4_double", {"hiprand_log_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_mtgp32_single", {"hiprand_mtgp32_single", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_mtgp32_single_specific", {"hiprand_mtgp32_single_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_mtgp32_specific", {"hiprand_mtgp32_specific", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - {"curand_normal", {"hiprand_normal", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal_double", {"hiprand_normal_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal2", {"hiprand_normal2", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal2_double", {"hiprand_normal2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal4", {"hiprand_normal4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_normal4_double", {"hiprand_normal4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform", {"hiprand_uniform", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform_double", {"hiprand_uniform_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform2_double", {"hiprand_uniform2_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform4", {"hiprand_uniform4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_uniform4_double", {"hiprand_uniform4_double", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_discrete", {"hiprand_discrete", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_discrete4", {"hiprand_discrete4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_poisson", {"hiprand_poisson", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_poisson4", {"hiprand_poisson4", "", CONV_LIB_DEVICE_FUNC, API_RAND}}, - {"curand_Philox4x32_10", {"hiprand_Philox4x32_10", "", CONV_LIB_DEVICE_FUNC, API_RAND, HIP_UNSUPPORTED}}, - // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence -}; diff --git a/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp b/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp deleted file mode 100644 index 67ecc93504..0000000000 --- a/hipify-clang/src/CUDA2HIP_RAND_API_types.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all functions -const std::map CUDA_RAND_TYPE_NAME_MAP{ - // RAND defines - {"CURAND_VER_MAJOR", {"HIPRAND_VER_MAJOR", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_MINOR", {"HIPRAND_VER_MINOR", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_PATCH", {"HIPRAND_VER_PATCH", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VER_BUILD", {"HIPRAND_VER_BUILD", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_VERSION", {"HIPRAND_VERSION", "", CONV_DEFINE, API_RAND, HIP_UNSUPPORTED}}, - - // RAND Host types - {"curandStatus", {"hiprandStatus_t", "", CONV_TYPE, API_RAND}}, - {"curandStatus_t", {"hiprandStatus_t", "", CONV_TYPE, API_RAND}}, - {"curandRngType", {"hiprandRngType_t", "", CONV_TYPE, API_RAND}}, - {"curandRngType_t", {"hiprandRngType_t", "", CONV_TYPE, API_RAND}}, - {"curandGenerator_st", {"hiprandGenerator_st", "", CONV_TYPE, API_RAND}}, - {"curandGenerator_t", {"hiprandGenerator_t", "", CONV_TYPE, API_RAND}}, - {"curandDirectionVectorSet", {"hiprandDirectionVectorSet_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDirectionVectorSet_t", {"hiprandDirectionVectorSet_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandOrdering", {"hiprandOrdering_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandOrdering_t", {"hiprandOrdering_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistribution_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2V_st", {"hiprandDistribution_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistribution_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2V_t", {"hiprandDistribution_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionShift_st", {"hiprandDistributionShift_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionShift_t", {"hiprandDistributionShift_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionM2Shift_st", {"hiprandDistributionM2Shift_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDistributionM2Shift_t", {"hiprandDistributionM2Shift_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2_st", {"hiprandHistogramM2_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2_t", {"hiprandHistogramM2_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_st", {"hiprandHistogramM2K_st", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandHistogramM2K_t", {"hiprandHistogramM2K_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDiscreteDistribution_st", {"hiprandDiscreteDistribution_st", "", CONV_TYPE, API_RAND}}, - {"curandDiscreteDistribution_t", {"hiprandDiscreteDistribution_t", "", CONV_TYPE, API_RAND}}, - {"curandMethod", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandMethod_t", {"hiprandMethod_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandDirectionVectors32_t", {"hiprandDirectionVectors32_t", "", CONV_TYPE, API_RAND}}, - {"curandDirectionVectors64_t", {"hiprandDirectionVectors64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - - // RAND types for Device functions - {"curandStateMtgp32", {"hiprandStateMtgp32", "", CONV_TYPE, API_RAND}}, - {"curandStateMtgp32_t", {"hiprandStateMtgp32_t", "", CONV_TYPE, API_RAND}}, - {"curandStateScrambledSobol64", {"hiprandStateScrambledSobol64", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol64_t", {"hiprandStateScrambledSobol64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol64", {"hiprandStateSobol64", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol64_t", {"hiprandStateSobol64_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol32", {"hiprandStateScrambledSobol32", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateScrambledSobol32_t", {"hiprandStateScrambledSobol32_t", "", CONV_TYPE, API_RAND, HIP_UNSUPPORTED}}, - {"curandStateSobol32", {"hiprandStateSobol32", "", CONV_TYPE, API_RAND}}, - {"curandStateSobol32_t", {"hiprandStateSobol32_t", "", CONV_TYPE, API_RAND}}, - {"curandStateMRG32k3a", {"hiprandStateMRG32k3a", "", CONV_TYPE, API_RAND}}, - {"curandStateMRG32k3a_t", {"hiprandStateMRG32k3a_t", "", CONV_TYPE, API_RAND}}, - {"curandStatePhilox4_32_10", {"hiprandStatePhilox4_32_10", "", CONV_TYPE, API_RAND}}, - {"curandStatePhilox4_32_10_t", {"hiprandStatePhilox4_32_10_t", "", CONV_TYPE, API_RAND}}, - {"curandStateXORWOW", {"hiprandStateXORWOW", "", CONV_TYPE, API_RAND}}, - {"curandStateXORWOW_t", {"hiprandStateXORWOW_t", "", CONV_TYPE, API_RAND}}, - {"curandState", {"hiprandState", "", CONV_TYPE, API_RAND}}, - {"curandState_t", {"hiprandState_t", "", CONV_TYPE, API_RAND}}, - - // RAND function call status types (enum curandStatus) - {"CURAND_STATUS_SUCCESS", {"HIPRAND_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_VERSION_MISMATCH", {"HIPRAND_STATUS_VERSION_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_NOT_INITIALIZED", {"HIPRAND_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_ALLOCATION_FAILED", {"HIPRAND_STATUS_ALLOCATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_TYPE_ERROR", {"HIPRAND_STATUS_TYPE_ERROR", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_OUT_OF_RANGE", {"HIPRAND_STATUS_OUT_OF_RANGE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_LENGTH_NOT_MULTIPLE", {"HIPRAND_STATUS_LENGTH_NOT_MULTIPLE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_DOUBLE_PRECISION_REQUIRED", {"HIPRAND_STATUS_DOUBLE_PRECISION_REQUIRED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_LAUNCH_FAILURE", {"HIPRAND_STATUS_LAUNCH_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_PREEXISTING_FAILURE", {"HIPRAND_STATUS_PREEXISTING_FAILURE", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_INITIALIZATION_FAILED", {"HIPRAND_STATUS_INITIALIZATION_FAILED", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_ARCH_MISMATCH", {"HIPRAND_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_STATUS_INTERNAL_ERROR", {"HIPRAND_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_RAND}}, - - // RAND generator types (enum curandRngType) - {"CURAND_RNG_TEST", {"HIPRAND_RNG_TEST", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_DEFAULT", {"HIPRAND_RNG_PSEUDO_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_XORWOW", {"HIPRAND_RNG_PSEUDO_XORWOW", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MRG32K3A", {"HIPRAND_RNG_PSEUDO_MRG32K3A", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MTGP32", {"HIPRAND_RNG_PSEUDO_MTGP32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_MT19937", {"HIPRAND_RNG_PSEUDO_MT19937", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_PSEUDO_PHILOX4_32_10", {"HIPRAND_RNG_PSEUDO_PHILOX4_32_10", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_DEFAULT", {"HIPRAND_RNG_QUASI_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SOBOL32", {"HIPRAND_RNG_QUASI_SOBOL32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL32", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SOBOL64", {"HIPRAND_RNG_QUASI_SOBOL64", "", CONV_NUMERIC_LITERAL, API_RAND}}, - {"CURAND_RNG_QUASI_SCRAMBLED_SOBOL64", {"HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64", "", CONV_NUMERIC_LITERAL, API_RAND}}, - - // RAND ordering of results in memory (enum curandOrdering) - {"CURAND_ORDERING_PSEUDO_BEST", {"HIPRAND_ORDERING_PSEUDO_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_PSEUDO_DEFAULT", {"HIPRAND_ORDERING_PSEUDO_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_PSEUDO_SEEDED", {"HIPRAND_ORDERING_PSEUDO_SEEDED", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ORDERING_QUASI_DEFAULT", {"HIPRAND_ORDERING_QUASI_DEFAULT", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - - // RAND choice of direction vector set (enum curandDirectionVectorSet) - {"CURAND_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_32_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_DIRECTION_VECTORS_64_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", {"HIPRAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - - // RAND method (enum curandMethod) - {"CURAND_CHOOSE_BEST", {"HIPRAND_CHOOSE_BEST", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_ITR", {"HIPRAND_ITR", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_KNUTH", {"HIPRAND_KNUTH", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_HITR", {"HIPRAND_HITR", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_M1", {"HIPRAND_M1", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_M2", {"HIPRAND_M2", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_BINARY_SEARCH", {"HIPRAND_BINARY_SEARCH", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DISCRETE_GAUSS", {"HIPRAND_DISCRETE_GAUSS", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_REJECTION", {"HIPRAND_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DEVICE_API", {"HIPRAND_DEVICE_API", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_FAST_REJECTION", {"HIPRAND_FAST_REJECTION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_3RD", {"HIPRAND_3RD", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_DEFINITION", {"HIPRAND_DEFINITION", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, - {"CURAND_POISSON", {"HIPRAND_POISSON", "", CONV_NUMERIC_LITERAL, API_RAND, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp deleted file mode 100644 index 25fb930905..0000000000 --- a/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp +++ /dev/null @@ -1,693 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Map of all CUDA Runtime API functions -const std::map CUDA_RUNTIME_FUNCTION_MAP{ - // 5.1. Device Management - // no analogue - {"cudaChooseDevice", {"hipChooseDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetAttribute - {"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetByPCIBusId - {"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceGetCacheConfig", {"hipDeviceGetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetLimit - {"cudaDeviceGetLimit", {"hipDeviceGetLimit", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetNvSciSyncAttributes - {"cudaDeviceGetNvSciSyncAttributes", {"hipDeviceGetNvSciSyncAttributes", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDeviceGetP2PAttribute - {"cudaDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDeviceGetPCIBusId - {"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetSharedMemConfig - {"cudaDeviceGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetStreamPriorityRange - {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceReset", {"hipDeviceReset", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaDeviceSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSetLimit - {"cudaDeviceSetLimit", {"hipDeviceSetLimit", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSetSharedMemConfig - {"cudaDeviceSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxSynchronize - {"cudaDeviceSynchronize", {"hipDeviceSynchronize", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGet - // NOTE: cuDeviceGet has no attr: int ordinal - {"cudaGetDevice", {"hipGetDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuDeviceGetCount - {"cudaGetDeviceCount", {"hipGetDeviceCount", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetFlags - // TODO: rename to hipGetDeviceFlags - {"cudaGetDeviceFlags", {"hipCtxGetFlags", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuDeviceGetProperties due to different attributes: CUdevprop and cudaDeviceProp - {"cudaGetDeviceProperties", {"hipGetDeviceProperties", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcCloseMemHandle - {"cudaIpcCloseMemHandle", {"hipIpcCloseMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcGetEventHandle - {"cudaIpcGetEventHandle", {"hipIpcGetEventHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcGetMemHandle - {"cudaIpcGetMemHandle", {"hipIpcGetMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcOpenEventHandle - {"cudaIpcOpenEventHandle", {"hipIpcOpenEventHandle", "", CONV_DEVICE, API_RUNTIME}}, - // cuIpcOpenMemHandle - {"cudaIpcOpenMemHandle", {"hipIpcOpenMemHandle", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaSetDevice", {"hipSetDevice", "", CONV_DEVICE, API_RUNTIME}}, - // cuCtxGetFlags - {"cudaSetDeviceFlags", {"hipSetDeviceFlags", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - {"cudaSetValidDevices", {"hipSetValidDevices", "", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.2. Thread Management [DEPRECATED] - // no analogue - {"cudaThreadExit", {"hipDeviceReset", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadGetCacheConfig", {"hipDeviceGetCacheConfig", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadGetLimit", {"hipThreadGetLimit", "", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaThreadSetCacheConfig", {"hipDeviceSetCacheConfig", "", CONV_THREAD, API_RUNTIME}}, - // no analogue - {"cudaThreadSetLimit", {"hipThreadSetLimit", "", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuCtxSynchronize - {"cudaThreadSynchronize", {"hipDeviceSynchronize", "", CONV_THREAD, API_RUNTIME}}, - - // 5.3. Error Handling - // no analogue - // NOTE: cudaGetErrorName and cuGetErrorName have different signatures - {"cudaGetErrorName", {"hipGetErrorName", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - // NOTE: cudaGetErrorString and cuGetErrorString have different signatures - {"cudaGetErrorString", {"hipGetErrorString", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - {"cudaGetLastError", {"hipGetLastError", "", CONV_ERROR, API_RUNTIME}}, - // no analogue - {"cudaPeekAtLastError", {"hipPeekAtLastError", "", CONV_ERROR, API_RUNTIME}}, - - // 5.4. Stream Management - // cuStreamAddCallback - {"cudaStreamAddCallback", {"hipStreamAddCallback", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamAttachMemAsync - {"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamBeginCapture - {"cudaStreamBeginCapture", {"hipStreamBeginCapture", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuStreamCreate due to different signatures - {"cudaStreamCreate", {"hipStreamCreate", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamCreate - {"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamCreateWithPriority - {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamDestroy - {"cudaStreamDestroy", {"hipStreamDestroy", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamEndCapture - {"cudaStreamEndCapture", {"hipStreamEndCapture", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamGetFlags - {"cudaStreamGetFlags", {"hipStreamGetFlags", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamGetPriority - {"cudaStreamGetPriority", {"hipStreamGetPriority", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamIsCapturing - {"cudaStreamIsCapturing", {"hipStreamIsCapturing", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamGetCaptureInfo - {"cudaStreamGetCaptureInfo", {"hipStreamGetCaptureInfo", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuStreamQuery - {"cudaStreamQuery", {"hipStreamQuery", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamSynchronize - {"cudaStreamSynchronize", {"hipStreamSynchronize", "", CONV_STREAM, API_RUNTIME}}, - // cuStreamWaitEvent - {"cudaStreamWaitEvent", {"hipStreamWaitEvent", "", CONV_STREAM, API_RUNTIME}}, - // cuThreadExchangeStreamCaptureMode - {"cudaThreadExchangeStreamCaptureMode", {"hipThreadExchangeStreamCaptureMode", "", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.5.Event Management - // no analogue - // NOTE: Not equal to cuEventCreate due to different signatures - {"cudaEventCreate", {"hipEventCreate", "", CONV_EVENT, API_RUNTIME}}, - // cuEventCreate - {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", "", CONV_EVENT, API_RUNTIME}}, - // cuEventDestroy - {"cudaEventDestroy", {"hipEventDestroy", "", CONV_EVENT, API_RUNTIME}}, - // cuEventElapsedTime - {"cudaEventElapsedTime", {"hipEventElapsedTime", "", CONV_EVENT, API_RUNTIME}}, - // cuEventQuery - {"cudaEventQuery", {"hipEventQuery", "", CONV_EVENT, API_RUNTIME}}, - // cuEventRecord - {"cudaEventRecord", {"hipEventRecord", "", CONV_EVENT, API_RUNTIME}}, - // cuEventSynchronize - {"cudaEventSynchronize", {"hipEventSynchronize", "", CONV_EVENT, API_RUNTIME}}, - - // 5.6. External Resource Interoperability - // cuDestroyExternalMemory - {"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuDestroyExternalSemaphore - {"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuExternalMemoryGetMappedBuffer - {"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuExternalMemoryGetMappedMipmappedArray - {"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuImportExternalMemory - {"cudaImportExternalMemory", {"hipImportExternalMemory", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuImportExternalSemaphore - {"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuSignalExternalSemaphoresAsync - {"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuWaitExternalSemaphoresAsync - {"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", "", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.7. Execution Control - // no analogue - {"cudaFuncGetAttributes", {"hipFuncGetAttributes", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaFuncSetAttribute", {"hipFuncSetAttribute", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuFuncSetCacheConfig due to different signatures - {"cudaFuncSetCacheConfig", {"hipFuncSetCacheConfig", "", CONV_DEVICE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuFuncSetSharedMemConfig due to different signatures - {"cudaFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetParameterBuffer", {"hipGetParameterBuffer", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetParameterBufferV2", {"hipGetParameterBufferV2", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuLaunchCooperativeKernel due to different signatures - {"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuLaunchCooperativeKernelMultiDevice due to different signatures - {"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME}}, - // cuLaunchHostFunc - {"cudaLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuLaunchKernel due to different signatures - {"cudaLaunchKernel", {"hipLaunchKernel", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaSetDoubleForDevice", {"hipSetDoubleForDevice", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaSetDoubleForHost", {"hipSetDoubleForHost", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.8. Occupancy - // cuOccupancyMaxActiveBlocksPerMultiprocessor - {"cudaOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxPotentialBlockSize - {"cudaOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_RUNTIME}}, - // cuOccupancyMaxPotentialBlockSizeWithFlags - {"cudaOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaOccupancyMaxPotentialBlockSizeVariableSMem", {"hipOccupancyMaxPotentialBlockSizeVariableSMem", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, - - // Former 5.9. Execution Control [DEPRECATED] - // NOTE: Removed in CUDA 10.1 - // no analogue - {"cudaConfigureCall", {"hipConfigureCall", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cudaLaunch due to different signatures - {"cudaLaunch", {"hipLaunchByPtr", "", CONV_EXECUTION, API_RUNTIME}}, - // no analogue - {"cudaSetupArgument", {"hipSetupArgument", "", CONV_EXECUTION, API_RUNTIME}}, - - // 5.9. Memory Management - // no analogue - {"cudaArrayGetInfo", {"hipArrayGetInfo", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemFree - {"cudaFree", {"hipFree", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaFreeArray", {"hipFreeArray", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemFreeHost - {"cudaFreeHost", {"hipHostFree", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures - {"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayGetLevel due to different signatures - {"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGetSymbolAddress", {"hipGetSymbolAddress", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaGetSymbolSize", {"hipGetSymbolSize", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostAlloc - {"cudaHostAlloc", {"hipHostMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostGetDevicePointer - {"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostGetFlags - {"cudaHostGetFlags", {"hipHostGetFlags", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostRegister - {"cudaHostRegister", {"hipHostRegister", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostUnregister - {"cudaHostUnregister", {"hipHostUnregister", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAlloc - {"cudaMalloc", {"hipMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMalloc3D", {"hipMalloc3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMalloc3DArray", {"hipMalloc3DArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMallocArray", {"hipMallocArray", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemHostAlloc - {"cudaMallocHost", {"hipHostMalloc", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAllocManaged - {"cudaMallocManaged", {"hipMallocManaged", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMipmappedArrayCreate due to different signatures - {"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemAllocPitch due to different signatures - {"cudaMallocPitch", {"hipMallocPitch", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemAdvise - {"cudaMemAdvise", {"hipMemAdvise", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy due to different signatures - {"cudaMemcpy", {"hipMemcpy", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy2D due to different signatures - {"cudaMemcpy2D", {"hipMemcpy2D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy2DAsync due to different signatures - {"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy3D due to different signatures - {"cudaMemcpy3D", {"hipMemcpy3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DAsync due to different signatures - {"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DPeer due to different signatures - {"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures - {"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuMemcpyAsync due to different signatures - {"cudaMemcpyAsync", {"hipMemcpyAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpyPeer due to different signatures - {"cudaMemcpyPeer", {"hipMemcpyPeer", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuMemcpyPeerAsync due to different signatures - {"cudaMemcpyPeerAsync", {"hipMemcpyPeerAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemGetInfo - {"cudaMemGetInfo", {"hipMemGetInfo", "", CONV_MEMORY, API_RUNTIME}}, - // TODO: double check cuMemPrefetchAsync - {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemRangeGetAttribute - {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemRangeGetAttributes - {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuMemsetD32 - hipMemsetD32 - {"cudaMemset", {"hipMemset", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset2D", {"hipMemset2D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset2DAsync", {"hipMemset2DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset3D", {"hipMemset3D", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemset3DAsync", {"hipMemset3DAsync", "", CONV_MEMORY, API_RUNTIME}}, - // cuMemsetD32Async - {"cudaMemsetAsync", {"hipMemsetAsync", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaExtent", {"make_hipExtent", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaPitchedPtr", {"make_hipPitchedPtr", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"make_cudaPos", {"make_hipPos", "", CONV_MEMORY, API_RUNTIME}}, - - // 5.10. Memory Management [DEPRECATED] - // no analogue - // NOTE: Not equal to cuMemcpyAtoA due to different signatures - {"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaMemcpyFromArray", {"hipMemcpyFromArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", "", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaMemcpyToArray", {"hipMemcpyToArray", "", CONV_MEMORY, API_RUNTIME}}, - // no analogue - {"cudaMemcpyToArrayAsync", {"hipMemcpyToArrayAsync", "", CONV_MEMORY, API_RUNTIME}}, - - // 5.11.Unified Addressing - // no analogue - // NOTE: Not equal to cuPointerGetAttributes due to different signatures - {"cudaPointerGetAttributes", {"hipPointerGetAttributes", "", CONV_ADDRESSING, API_RUNTIME}}, - - // 5.12. Peer Device Memory Access - // cuDeviceCanAccessPeer - {"cudaDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", "", CONV_PEER, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuCtxDisablePeerAccess due to different signatures - {"cudaDeviceDisablePeerAccess", {"hipDeviceDisablePeerAccess", "", CONV_PEER, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuCtxEnablePeerAccess due to different signatures - {"cudaDeviceEnablePeerAccess", {"hipDeviceEnablePeerAccess", "", CONV_PEER, API_RUNTIME}}, - - // 5.13. OpenGL Interoperability - // cuGLGetDevices - {"cudaGLGetDevices", {"hipGLGetDevices", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsGLRegisterBuffer - {"cudaGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsGLRegisterImage - {"cudaGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuWGLGetDevice - {"cudaWGLGetDevice", {"hipWGLGetDevice", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.14. OpenGL Interoperability [DEPRECATED] - // no analogue - // NOTE: Not equal to cuGLMapBufferObject due to different signatures - {"cudaGLMapBufferObject", {"hipGLMapBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - // NOTE: Not equal to cuGLMapBufferObjectAsync due to different signatures - {"cudaGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLRegisterBufferObject - {"cudaGLRegisterBufferObject", {"hipGLRegisterBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLSetBufferObjectMapFlags - {"cudaGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaGLSetGLDevice", {"hipGLSetGLDevice", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnmapBufferObject - {"cudaGLUnmapBufferObject", {"hipGLUnmapBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnmapBufferObjectAsync - {"cudaGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGLUnregisterBufferObject - {"cudaGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", "", CONV_OPENGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.15. Direct3D 9 Interoperability - // cuD3D9GetDevice - {"cudaD3D9GetDevice", {"hipD3D9GetDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9GetDevices - {"cudaD3D9GetDevices", {"hipD3D9GetDevices", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9GetDirect3DDevice - {"cudaD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D9SetDirect3DDevice", {"hipD3D9SetDirect3DDevice", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D9RegisterResource - {"cudaGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.16.Direct3D 9 Interoperability[DEPRECATED] - // cuD3D9MapResources - {"cudaD3D9MapResources", {"hipD3D9MapResources", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9RegisterResource - {"cudaD3D9RegisterResource", {"hipD3D9RegisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedArray - {"cudaD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9ResourceGetMappedPitch - {"cudaD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedPointer - {"cudaD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetMappedSize - {"cudaD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceGetSurfaceDimensions - {"cudaD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9ResourceSetMapFlags - {"cudaD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9UnmapResources - {"cudaD3D9UnmapResources", {"hipD3D9UnmapResources", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D9UnregisterResource - {"cudaD3D9UnregisterResource", {"hipD3D9UnregisterResource", "", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.17. Direct3D 10 Interoperability - // cuD3D10GetDevice - {"cudaD3D10GetDevice", {"hipD3D10GetDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10GetDevices - {"cudaD3D10GetDevices", {"hipD3D10GetDevices", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D10RegisterResource - {"cudaGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.18. Direct3D 10 Interoperability [DEPRECATED] - // cudaD3D10GetDirect3DDevice - {"cudaD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10MapResources - {"cudaD3D10MapResources", {"hipD3D10MapResources", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10RegisterResource - {"cudaD3D10RegisterResource", {"hipD3D10RegisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedArray - {"cudaD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10ResourceGetMappedPitch - {"cudaD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedPointer - {"cudaD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetMappedSize - {"cudaD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceGetSurfaceDimensions - {"cudaD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10ResourceSetMapFlags - {"cudaD3D10ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D10SetDirect3DDevice", {"hipD3D10SetDirect3DDevice", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10UnmapResources - {"cudaD3D10UnmapResources", {"hipD3D10UnmapResources", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D10UnregisterResource - {"cudaD3D10UnregisterResource", {"hipD3D10UnregisterResource", "", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.19. Direct3D 11 Interoperability - // cuD3D11GetDevice - {"cudaD3D11GetDevice", {"hipD3D11GetDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuD3D11GetDevices - {"cudaD3D11GetDevices", {"hipD3D11GetDevices", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsD3D11RegisterResource - {"cudaGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.20. Direct3D 11 Interoperability [DEPRECATED] - // cuD3D11GetDirect3DDevice - {"cudaD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaD3D11SetDirect3DDevice", {"hipD3D11SetDirect3DDevice", "", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.21. VDPAU Interoperability - // cuGraphicsVDPAURegisterOutputSurface - {"cudaGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsVDPAURegisterVideoSurface - {"cudaGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuVDPAUGetDevice - {"cudaVDPAUGetDevice", {"hipVDPAUGetDevice", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"cudaVDPAUSetVDPAUDevice", {"hipVDPAUSetDevice", "", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.22. EGL Interoperability - // cuEGLStreamConsumerAcquireFrame - {"cudaEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerConnect - {"cudaEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerConnectWithFlags - {"cudaEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerDisconnect - {"cudaEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamConsumerReleaseFrame - {"cudaEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerConnect - {"cudaEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerDisconnect - {"cudaEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerPresentFrame - {"cudaEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEGLStreamProducerReturnFrame - {"cudaEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuEventCreateFromEGLSync - {"cudaEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsEGLRegisterImage - {"cudaGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedEglFrame - {"cudaGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", "", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.23. Graphics Interoperability - // cuGraphicsMapResources - {"cudaGraphicsMapResources", {"hipGraphicsMapResources", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedMipmappedArray - {"cudaGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceGetMappedPointer - {"cudaGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsResourceSetMapFlags - {"cudaGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsSubResourceGetMappedArray - {"cudaGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsUnmapResources - {"cudaGraphicsUnmapResources", {"hipGraphicsUnmapResources", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphicsUnregisterResource - {"cudaGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", "", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.24. Texture Reference Management [DEPRECATED] - // no analogue - {"cudaBindTexture", {"hipBindTexture", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaBindTexture2D", {"hipBindTexture2D", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaBindTextureToArray", {"hipBindTextureToArray", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Unsupported yet on NVCC path - {"cudaBindTextureToMipmappedArray", {"hipBindTextureToMipmappedArray", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaCreateChannelDesc", {"hipCreateChannelDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaGetChannelDesc", {"hipGetChannelDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaGetTextureAlignmentOffset", {"hipGetTextureAlignmentOffset", "", CONV_TEXTURE, API_RUNTIME}}, - // TODO: double check cuModuleGetTexRef - // NOTE: Unsupported yet on NVCC path - {"cudaGetTextureReference", {"hipGetTextureReference", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - {"cudaUnbindTexture", {"hipUnbindTexture", "", CONV_TEXTURE, API_RUNTIME}}, - - // 5.25. Surface Reference Management [DEPRECATED] - // no analogue - {"cudaBindSurfaceToArray", {"hipBindSurfaceToArray", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - // TODO: double check cuModuleGetSurfRef - {"cudaGetSurfaceReference", {"hipGetSurfaceReference", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.26. Texture Object Management - // no analogue - // NOTE: Not equal to cuTexObjectCreate due to different signatures - {"cudaCreateTextureObject", {"hipCreateTextureObject", "", CONV_TEXTURE, API_RUNTIME}}, - // cuTexObjectDestroy - {"cudaDestroyTextureObject", {"hipDestroyTextureObject", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuTexObjectGetResourceDesc due to different signatures - {"cudaGetTextureObjectResourceDesc", {"hipGetTextureObjectResourceDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // cuTexObjectGetResourceViewDesc - {"cudaGetTextureObjectResourceViewDesc", {"hipGetTextureObjectResourceViewDesc", "", CONV_TEXTURE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures - {"cuTexObjectGetTextureDesc", {"hipGetTextureObjectTextureDesc", "", CONV_TEXTURE, API_RUNTIME}}, - - // 5.27. Surface Object Management - // no analogue - // NOTE: Not equal to cuSurfObjectCreate due to different signatures - {"cudaCreateSurfaceObject", {"hipCreateSurfaceObject", "", CONV_SURFACE, API_RUNTIME}}, - // cuSurfObjectDestroy - {"cudaDestroySurfaceObject", {"hipDestroySurfaceObject", "", CONV_SURFACE, API_RUNTIME}}, - // no analogue - // NOTE: Not equal to cuSurfObjectGetResourceDesc due to different signatures - {"cudaGetSurfaceObjectResourceDesc", {"hipGetSurfaceObjectResourceDesc", "", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.28.Version Management - // cuDriverGetVersion - {"cudaDriverGetVersion", {"hipDriverGetVersion", "", CONV_VERSION, API_RUNTIME}}, - // no analogue - {"cudaRuntimeGetVersion", {"hipRuntimeGetVersion", "", CONV_VERSION, API_RUNTIME}}, - - // 5.29. Graph Management - // cuGraphAddChildGraphNode - {"cudaGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddDependencies - {"cudaGraphAddDependencies", {"hipGraphAddDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddEmptyNode - {"cudaGraphAddEmptyNode", {"hipGraphAddEmptyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddHostNode - {"cudaGraphAddHostNode", {"hipGraphAddHostNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddKernelNode - {"cudaGraphAddKernelNode", {"hipGraphAddKernelNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddMemcpyNode - {"cudaGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphAddMemsetNode - {"cudaGraphAddMemsetNode", {"hipGraphAddMemsetNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphChildGraphNodeGetGraph - {"cudaGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphClone - {"cudaGraphClone", {"hipGraphClone", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphCreate - {"cudaGraphCreate", {"hipGraphCreate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphDestroy - {"cudaGraphDestroy", {"hipGraphDestroy", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphDestroyNode - {"cudaGraphDestroyNode", {"hipGraphDestroyNode", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecDestroy - {"cudaGraphExecDestroy", {"hipGraphExecDestroy", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetEdges - {"cudaGraphGetEdges", {"hipGraphGetEdges", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetNodes - {"cudaGraphGetNodes", {"hipGraphGetNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphGetRootNodes - {"cudaGraphGetRootNodes", {"hipGraphGetRootNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphHostNodeGetParams - {"cudaGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphHostNodeSetParams - {"cudaGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphInstantiate - {"cudaGraphInstantiate", {"hipGraphInstantiate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecKernelNodeSetParams - {"cudaGraphExecKernelNodeSetParams", {"hipGraphExecKernelNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecMemcpyNodeSetParams - {"cudaGraphExecMemcpyNodeSetParams", {"hipGraphExecMemcpyNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecMemsetNodeSetParams - {"cudaGraphExecMemsetNodeSetParams", {"hipGraphExecMemsetNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecHostNodeSetParams - {"cudaGraphExecHostNodeSetParams", {"hipGraphExecHostNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphExecUpdate - {"cudaGraphExecUpdate", {"hipGraphExecUpdate", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphKernelNodeGetParams - {"cudaGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphKernelNodeSetParams - {"cudaGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphLaunch - {"cudaGraphLaunch", {"hipGraphLaunch", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemcpyNodeGetParams - {"cudaGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemcpyNodeSetParams - {"cudaGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemsetNodeGetParams - {"cudaGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphMemsetNodeSetParams - {"cudaGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeFindInClone - {"cudaGraphNodeFindInClone", {"hipGraphNodeFindInClone", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetDependencies - {"cudaGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetDependentNodes - {"cudaGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphNodeGetType - {"cudaGraphNodeGetType", {"hipGraphNodeGetType", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuGraphRemoveDependencies - {"cudaGraphRemoveDependencies", {"hipGraphRemoveDependencies", "", CONV_GRAPH, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5.32. Profiler Control - // cuProfilerInitialize - {"cudaProfilerInitialize", {"hipProfilerInitialize", "", CONV_PROFILER, API_RUNTIME, HIP_UNSUPPORTED}}, - // cuProfilerStart - {"cudaProfilerStart", {"hipProfilerStart", "", CONV_PROFILER, API_RUNTIME}}, - // cuProfilerStop - {"cudaProfilerStop", {"hipProfilerStop", "", CONV_PROFILER, API_RUNTIME}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp b/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp deleted file mode 100644 index 6eb9bfb2be..0000000000 --- a/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp +++ /dev/null @@ -1,1426 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA RUNTIME API types to the corresponding HIP types -const std::map CUDA_RUNTIME_TYPE_NAME_MAP { - - // 1. Structs - - // no analogue - {"cudaChannelFormatDesc", {"hipChannelFormatDesc", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaDeviceProp", {"hipDeviceProp_t", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaEglFrame", {"hipEglFrame", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaEglFrame_st", {"hipEglFrame", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaEglPlaneDesc", {"hipEglPlaneDesc", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaEglPlaneDesc_st", {"hipEglPlaneDesc", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaExtent", {"hipExtent", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_EXTERNAL_MEMORY_BUFFER_DESC - {"cudaExternalMemoryBufferDesc", {"HIP_EXTERNAL_MEMORY_BUFFER_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_MEMORY_HANDLE_DESC - {"cudaExternalMemoryHandleDesc", {"HIP_EXTERNAL_MEMORY_HANDLE_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC - {"cudaExternalMemoryMipmappedArrayDesc", {"HIP_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC - {"cudaExternalSemaphoreHandleDesc", {"HIP_EXTERNAL_SEMAPHORE_HANDLE_DESC", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS - {"cudaExternalSemaphoreSignalParams", {"HIP_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS - {"cudaExternalSemaphoreWaitParams", {"HIP_EXTERNAL_SEMAPHORE_WAIT_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaFuncAttributes", {"hipFuncAttributes", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_HOST_NODE_PARAMS - {"cudaHostNodeParams", {"HIP_HOST_NODE_PARAMS", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUipcEventHandle - {"cudaIpcEventHandle_t", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME}}, - // CUipcEventHandle_st - {"cudaIpcEventHandle_st", {"ihipIpcEventHandle_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUipcMemHandle - {"cudaIpcMemHandle_t", {"hipIpcMemHandle_t", "", CONV_TYPE, API_RUNTIME}}, - // CUipcMemHandle_st - {"cudaIpcMemHandle_st", {"hipIpcMemHandle_st", "", CONV_TYPE, API_RUNTIME}}, - - // CUDA_KERNEL_NODE_PARAMS - {"cudaKernelNodeParams", {"hipKernelNodeParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - // CUDA_LAUNCH_PARAMS struct differs - {"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: HIP struct is bigger and contains cudaMemcpy3DParms only in the beginning - {"cudaMemcpy3DParms", {"hipMemcpy3DParms", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaMemcpy3DPeerParms", {"hipMemcpy3DPeerParms", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUDA_MEMSET_NODE_PARAMS - {"cudaMemsetParams", {"hipMemsetParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // no analogue - {"cudaPitchedPtr", {"hipPitchedPtr", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaPointerAttributes", {"hipPointerAttribute_t", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - {"cudaPos", {"hipPos", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: CUDA_RESOURCE_DESC struct differs - {"cudaResourceDesc", {"hipResourceDesc", "", CONV_TYPE, API_RUNTIME}}, - - // NOTE: CUDA_RESOURCE_VIEW_DESC has reserved bytes in the end - {"cudaResourceViewDesc", {"hipResourceViewDesc", "", CONV_TYPE, API_RUNTIME}}, - - // no analogue - // NOTE: CUDA_TEXTURE_DESC differs - {"cudaTextureDesc", {"hipTextureDesc", "", CONV_TYPE, API_RUNTIME}}, - - // NOTE: the same struct and its name - {"CUuuid_st", {"hipUUID", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // NOTE: possibly CUsurfref is analogue - {"surfaceReference", {"hipSurfaceReference", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUevent_st - {"CUevent_st", {"ihipEvent_t", "", CONV_TYPE, API_RUNTIME}}, - // CUevent - {"cudaEvent_t", {"hipEvent_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUextMemory_st - {"CUexternalMemory_st", {"hipExtMemory_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUexternalMemory - {"cudaExternalMemory_t", {"hipExternalMemory", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUextSemaphore_st - {"CUexternalSemaphore_st", {"hipExtSemaphore_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUexternalSemaphore - {"cudaExternalSemaphore_t", {"hipExternalSemaphore", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUgraph_st - {"CUgraph_st", {"hipGraph_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraph - {"cudaGraph_t", {"hipGraph", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same -CUgraphExec_st - {"CUgraphExec_st", {"hipGraphExec_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphExec - {"cudaGraphExec_t", {"hipGraphExec", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUgraphicsResource_st - {"cudaGraphicsResource", {"hipGraphicsResource_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphicsResource - {"cudaGraphicsResource_t", {"hipGraphicsResource_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // the same - CUgraphNode_st - {"CUgraphNode_st", {"hipGraphNode_st", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUgraphNode - {"cudaGraphNode_t", {"hipGraphNode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUeglStreamConnection_st - {"CUeglStreamConnection_st", {"hipEglStreamConnection", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUeglStreamConnection - {"cudaEglStreamConnection", {"hipEglStreamConnection", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUarray_st - {"cudaArray", {"hipArray", "", CONV_TYPE, API_RUNTIME}}, - // CUarray - {"cudaArray_t", {"hipArray_t", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaArray_const_t", {"hipArray_const_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUmipmappedArray_st - {"cudaMipmappedArray", {"hipMipmappedArray", "", CONV_TYPE, API_RUNTIME}}, - // CUmipmappedArray - {"cudaMipmappedArray_t", {"hipMipmappedArray_t", "", CONV_TYPE, API_RUNTIME}}, - // no analogue - {"cudaMipmappedArray_const_t", {"hipMipmappedArray_const_t", "", CONV_TYPE, API_RUNTIME}}, - - // the same - CUstream_st - {"CUstream_st", {"ihipStream_t", "", CONV_TYPE, API_RUNTIME}}, - // CUstream - {"cudaStream_t", {"hipStream_t", "", CONV_TYPE, API_RUNTIME}}, - - // 3. Enums - - // no analogue - {"cudaCGScope", {"hipCGScope", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaCGScope enum values - {"cudaCGScopeInvalid", {"hipCGScopeInvalid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaCGScopeGrid", {"hipCGScopeGrid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - {"cudaCGScopeMultiGrid", {"hipCGScopeMultiGrid", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"cudaChannelFormatKind", {"hipChannelFormatKind", "", CONV_TYPE, API_RUNTIME}}, - // cudaChannelFormatKind enum values - {"cudaChannelFormatKindSigned", {"hipChannelFormatKindSigned", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaChannelFormatKindUnsigned", {"hipChannelFormatKindUnsigned", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaChannelFormatKindFloat", {"hipChannelFormatKindFloat", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaChannelFormatKindNone", {"hipChannelFormatKindNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUcomputemode - {"cudaComputeMode", {"hipComputeMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaComputeMode enum values - // CU_COMPUTEMODE_DEFAULT - {"cudaComputeModeDefault", {"hipComputeModeDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_COMPUTEMODE_EXCLUSIVE - {"cudaComputeModeExclusive", {"hipComputeModeExclusive", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_COMPUTEMODE_PROHIBITED - {"cudaComputeModeProhibited", {"hipComputeModeProhibited", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_COMPUTEMODE_EXCLUSIVE_PROCESS - {"cudaComputeModeExclusiveProcess", {"hipComputeModeExclusiveProcess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUdevice_attribute - {"cudaDeviceAttr", {"hipDeviceAttribute_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaDeviceAttr enum values - // CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK - {"cudaDevAttrMaxThreadsPerBlock", {"hipDeviceAttributeMaxThreadsPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X - {"cudaDevAttrMaxBlockDimX", {"hipDeviceAttributeMaxBlockDimX", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y - {"cudaDevAttrMaxBlockDimY", {"hipDeviceAttributeMaxBlockDimY", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - // CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z - {"cudaDevAttrMaxBlockDimZ", {"hipDeviceAttributeMaxBlockDimZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X - {"cudaDevAttrMaxGridDimX", {"hipDeviceAttributeMaxGridDimX", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y - {"cudaDevAttrMaxGridDimY", {"hipDeviceAttributeMaxGridDimY", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 - // CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z - {"cudaDevAttrMaxGridDimZ", {"hipDeviceAttributeMaxGridDimZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK - {"cudaDevAttrMaxSharedMemoryPerBlock", {"hipDeviceAttributeMaxSharedMemoryPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 - // CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY - {"cudaDevAttrTotalConstantMemory", {"hipDeviceAttributeTotalConstantMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 - // CU_DEVICE_ATTRIBUTE_WARP_SIZE - {"cudaDevAttrWarpSize", {"hipDeviceAttributeWarpSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 10 - // CU_DEVICE_ATTRIBUTE_MAX_PITCH - {"cudaDevAttrMaxPitch", {"hipDeviceAttributeMaxPitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 11 - // CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK - {"cudaDevAttrMaxRegistersPerBlock", {"hipDeviceAttributeMaxRegistersPerBlock", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 12 - // CU_DEVICE_ATTRIBUTE_CLOCK_RATE - {"cudaDevAttrClockRate", {"hipDeviceAttributeClockRate", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 - // CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT - {"cudaDevAttrTextureAlignment", {"hipDeviceAttributeTextureAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 14 - // CU_DEVICE_ATTRIBUTE_GPU_OVERLAP - // NOTE: Is not deprecated as CUDA Driver's API analogue CU_DEVICE_ATTRIBUTE_GPU_OVERLAP - {"cudaDevAttrGpuOverlap", {"hipDeviceAttributeGpuOverlap", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 15 - // CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT - {"cudaDevAttrMultiProcessorCount", {"hipDeviceAttributeMultiprocessorCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 16 - // CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT - {"cudaDevAttrKernelExecTimeout", {"hipDeviceAttributeKernelExecTimeout", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 17 - // CU_DEVICE_ATTRIBUTE_INTEGRATED - {"cudaDevAttrIntegrated", {"hipDeviceAttributeIntegrated", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 18 - // CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY - {"cudaDevAttrCanMapHostMemory", {"hipDeviceAttributeCanMapHostMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 19 - // CU_DEVICE_ATTRIBUTE_COMPUTE_MODE - {"cudaDevAttrComputeMode", {"hipDeviceAttributeComputeMode", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 20 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH - {"cudaDevAttrMaxTexture1DWidth", {"hipDeviceAttributeMaxTexture1DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 21 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH - {"cudaDevAttrMaxTexture2DWidth", {"hipDeviceAttributeMaxTexture2DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 22 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT - {"cudaDevAttrMaxTexture2DHeight", {"hipDeviceAttributeMaxTexture2DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 23 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH - {"cudaDevAttrMaxTexture3DWidth", {"hipDeviceAttributeMaxTexture3DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 24 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT - {"cudaDevAttrMaxTexture3DHeight", {"hipDeviceAttributeMaxTexture3DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 25 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH - {"cudaDevAttrMaxTexture3DDepth", {"hipDeviceAttributeMaxTexture3DDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 26 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH - {"cudaDevAttrMaxTexture2DLayeredWidth", {"hipDeviceAttributeMaxTexture2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT - {"cudaDevAttrMaxTexture2DLayeredHeight", {"hipDeviceAttributeMaxTexture2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS - {"cudaDevAttrMaxTexture2DLayeredLayers", {"hipDeviceAttributeMaxTexture2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 29 - // CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT - {"cudaDevAttrSurfaceAlignment", {"hipDeviceAttributeSurfaceAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 30 - // CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS - {"cudaDevAttrConcurrentKernels", {"hipDeviceAttributeConcurrentKernels", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 31 - // CU_DEVICE_ATTRIBUTE_ECC_ENABLED - {"cudaDevAttrEccEnabled", {"hipDeviceAttributeEccEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 32 - // CU_DEVICE_ATTRIBUTE_PCI_BUS_ID - {"cudaDevAttrPciBusId", {"hipDeviceAttributePciBusId", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 33 - // CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID - {"cudaDevAttrPciDeviceId", {"hipDeviceAttributePciDeviceId", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 34 - // CU_DEVICE_ATTRIBUTE_TCC_DRIVER - {"cudaDevAttrTccDriver", {"hipDeviceAttributeTccDriver", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 35 - // CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE - {"cudaDevAttrMemoryClockRate", {"hipDeviceAttributeMemoryClockRate", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 36 - // CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH - {"cudaDevAttrGlobalMemoryBusWidth", {"hipDeviceAttributeMemoryBusWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 37 - // CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE - {"cudaDevAttrL2CacheSize", {"hipDeviceAttributeL2CacheSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 38 - // CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR - {"cudaDevAttrMaxThreadsPerMultiProcessor", {"hipDeviceAttributeMaxThreadsPerMultiProcessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 39 - // CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT - {"cudaDevAttrAsyncEngineCount", {"hipDeviceAttributeAsyncEngineCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 40 - // CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING - {"cudaDevAttrUnifiedAddressing", {"hipDeviceAttributeUnifiedAddressing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 41 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH - {"cudaDevAttrMaxTexture1DLayeredWidth", {"hipDeviceAttributeMaxTexture1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 42 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS - {"cudaDevAttrMaxTexture1DLayeredLayers", {"hipDeviceAttributeMaxTexture1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // 44 - no - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH - {"cudaDevAttrMaxTexture2DGatherWidth", {"hipDeviceAttributeMaxTexture2DGatherWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT - {"cudaDevAttrMaxTexture2DGatherHeight", {"hipDeviceAttributeMaxTexture2DGatherHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE - {"cudaDevAttrMaxTexture3DWidthAlt", {"hipDeviceAttributeMaxTexture3DWidthAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 47 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE - {"cudaDevAttrMaxTexture3DHeightAlt", {"hipDeviceAttributeMaxTexture3DHeightAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 48 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE - {"cudaDevAttrMaxTexture3DDepthAlt", {"hipDeviceAttributeMaxTexture3DDepthAlternate", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID - {"cudaDevAttrPciDomainId", {"hipDeviceAttributePciDomainId", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 50 - // CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT - {"cudaDevAttrTexturePitchAlignment", {"hipDeviceAttributeTexturePitchAlignment", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 51 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH - {"cudaDevAttrMaxTextureCubemapWidth", {"hipDeviceAttributeMaxTextureCubemapWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 52 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH - {"cudaDevAttrMaxTextureCubemapLayeredWidth", {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 53 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS - {"cudaDevAttrMaxTextureCubemapLayeredLayers", {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 54 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH - {"cudaDevAttrMaxSurface1DWidth", {"hipDeviceAttributeMaxSurface1DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 55 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH - {"cudaDevAttrMaxSurface2DWidth", {"hipDeviceAttributeMaxSurface2DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 56 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT - {"cudaDevAttrMaxSurface2DHeight", {"hipDeviceAttributeMaxSurface2DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 57 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH - {"cudaDevAttrMaxSurface3DWidth", {"hipDeviceAttributeMaxSurface3DWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 58 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT - {"cudaDevAttrMaxSurface3DHeight", {"hipDeviceAttributeMaxSurface3DHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 59 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH - {"cudaDevAttrMaxSurface3DDepth", {"hipDeviceAttributeMaxSurface3DDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 60 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH - {"cudaDevAttrMaxSurface1DLayeredWidth", {"hipDeviceAttributeMaxSurface1DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 61 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS - {"cudaDevAttrMaxSurface1DLayeredLayers", {"hipDeviceAttributeMaxSurface1DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 62 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH - {"cudaDevAttrMaxSurface2DLayeredWidth", {"hipDeviceAttributeMaxSurface2DLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 63 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT - {"cudaDevAttrMaxSurface2DLayeredHeight", {"hipDeviceAttributeMaxSurface2DLayeredHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LA YERS - {"cudaDevAttrMaxSurface2DLayeredLayers", {"hipDeviceAttributeMaxSurface2DLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH - {"cudaDevAttrMaxSurfaceCubemapWidth", {"hipDeviceAttributeMaxSurfaceCubemapWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH - {"cudaDevAttrMaxSurfaceCubemapLayeredWidth", {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS - {"cudaDevAttrMaxSurfaceCubemapLayeredLayers", {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH - {"cudaDevAttrMaxTexture1DLinearWidth", {"hipDeviceAttributeMaxTexture1DLinearWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH - {"cudaDevAttrMaxTexture2DLinearWidth", {"hipDeviceAttributeMaxTexture2DLinearWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 70 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT - {"cudaDevAttrMaxTexture2DLinearHeight", {"hipDeviceAttributeMaxTexture2DLinearHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 71 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH - {"cudaDevAttrMaxTexture2DLinearPitch", {"hipDeviceAttributeMaxTexture2DLinearPitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 72 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH - {"cudaDevAttrMaxTexture2DMipmappedWidth", {"hipDeviceAttributeMaxTexture2DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 73 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT - {"cudaDevAttrMaxTexture2DMipmappedHeight", {"hipDeviceAttributeMaxTexture2DMipmappedHeight", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 74 - // CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR - {"cudaDevAttrComputeCapabilityMajor", {"hipDeviceAttributeComputeCapabilityMajor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 75 - // CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR - {"cudaDevAttrComputeCapabilityMinor", {"hipDeviceAttributeComputeCapabilityMinor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 76 - // CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH - {"cudaDevAttrMaxTexture1DMipmappedWidth", {"hipDeviceAttributeMaxTexture1DMipmappedWidth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 77 - // CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED - {"cudaDevAttrStreamPrioritiesSupported", {"hipDeviceAttributeStreamPrioritiesSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 78 - // CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED - {"cudaDevAttrGlobalL1CacheSupported", {"hipDeviceAttributeGlobalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 79 - // CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED - {"cudaDevAttrLocalL1CacheSupported", {"hipDeviceAttributeLocalL1CacheSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 80 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR - {"cudaDevAttrMaxSharedMemoryPerMultiprocessor", {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 81 - // CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR - {"cudaDevAttrMaxRegistersPerMultiprocessor", {"hipDeviceAttributeMaxRegistersPerMultiprocessor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 82 - // CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY - {"cudaDevAttrManagedMemory", {"hipDeviceAttributeManagedMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 83 - // CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD - {"cudaDevAttrIsMultiGpuBoard", {"hipDeviceAttributeIsMultiGpuBoard", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 84 - // CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID - {"cudaDevAttrMultiGpuBoardGroupID", {"hipDeviceAttributeMultiGpuBoardGroupID", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 85 - // CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED - {"cudaDevAttrHostNativeAtomicSupported", {"hipDeviceAttributeHostNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 86 - // CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO - {"cudaDevAttrSingleToDoublePrecisionPerfRatio", {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 87 - // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS - {"cudaDevAttrPageableMemoryAccess", {"hipDeviceAttributePageableMemoryAccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 88 - // CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS - {"cudaDevAttrConcurrentManagedAccess", {"hipDeviceAttributeConcurrentManagedAccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 89 - // CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED - {"cudaDevAttrComputePreemptionSupported", {"hipDeviceAttributeComputePreemptionSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 90 - // CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM - {"cudaDevAttrCanUseHostPointerForRegisteredMem", {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 91 - // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS - {"cudaDevAttrReserved92", {"hipDeviceAttributeCanUseStreamMemOps", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 92 - // CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS - {"cudaDevAttrReserved93", {"hipDeviceAttributeCanUse64BitStreamMemOps", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 93 - // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR - {"cudaDevAttrReserved94", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 94 - // CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH - {"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 95 - // CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH - {"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 96 - // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN - {"cudaDevAttrMaxSharedMemoryPerBlockOptin", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 97 - // CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES - {"cudaDevAttrCanFlushRemoteWrites", {"hipDeviceAttributeCanFlushRemoteWrites", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 98 - // CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED - {"cudaDevAttrHostRegisterSupported", {"hipDeviceAttributeHostRegisterSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 99 - // CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES - {"cudaDevAttrPageableMemoryAccessUsesHostPageTables", {"hipDeviceAttributePageableMemoryAccessUsesHostPageTables", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 100 - // CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST - {"cudaDevAttrDirectManagedMemAccessFromHost", {"hipDeviceAttributeDirectManagedMemAccessFromHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 101 - - // CUdevice_P2PAttribute - {"cudaDeviceP2PAttr", {"hipDeviceP2PAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaDeviceP2PAttr enum values - // CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01 - {"cudaDevP2PAttrPerformanceRank", {"hipDeviceP2PAttributePerformanceRank", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02 - {"cudaDevP2PAttrAccessSupported", {"hipDeviceP2PAttributeAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03 - {"cudaDevP2PAttrNativeAtomicSupported", {"hipDeviceP2PAttributeNativeAtomicSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04 - {"cudaDevP2PAttrCudaArrayAccessSupported", {"hipDevP2PAttributeCudaArrayAccessSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - - // cudaEGL.h - presented only on Linux in nvidia-cuda-dev package - // CUeglColorFormat - {"cudaEglColorFormat", {"hipEglColorFormat", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglColorFormat enum values - // CU_EGL_COLOR_FORMAT_YUV420_PLANAR = 0x00 - {"cudaEglColorFormatYUV420Planar", {"hipEglColorFormatYUV420Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR= 0x01 - {"cudaEglColorFormatYUV420SemiPlanar ", {"hipEglColorFormatYUV420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EGL_COLOR_FORMAT_YUV422_PLANAR = 0x02 - {"cudaEglColorFormatYUV422Planar", {"hipEglColorFormatYUV422Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR = 0x03 - {"cudaEglColorFormatYUV422SemiPlanar", {"hipEglColorFormatYUV422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EGL_COLOR_FORMAT_RGB = 0x04 - {"cudaEglColorFormatRGB", {"hipEglColorFormatRGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EGL_COLOR_FORMAT_BGR = 0x05 - {"cudaEglColorFormatBGR", {"hipEglColorFormatBGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EGL_COLOR_FORMAT_ARGB = 0x06 - {"cudaEglColorFormatARGB", {"hipEglColorFormatARGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EGL_COLOR_FORMAT_RGBA = 0x07 - {"cudaEglColorFormatRGBA", {"hipEglColorFormatRGBA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EGL_COLOR_FORMAT_L = 0x08 - {"cudaEglColorFormatL", {"hipEglColorFormatL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - // CU_EGL_COLOR_FORMAT_R = 0x09 - {"cudaEglColorFormatR", {"hipEglColorFormatR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 - // CU_EGL_COLOR_FORMAT_YUV444_PLANAR = 0x0A - {"cudaEglColorFormatYUV444Planar", {"hipEglColorFormatYUV444Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 - // CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR = 0x0B - {"cudaEglColorFormatYUV444SemiPlanar", {"hipEglColorFormatYUV444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 11 - // CU_EGL_COLOR_FORMAT_YUYV_422 = 0x0C - {"cudaEglColorFormatYUYV422", {"hipEglColorFormatYUYV422", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 - // CU_EGL_COLOR_FORMAT_UYVY_422 = 0x0D - {"cudaEglColorFormatUYVY422", {"hipEglColorFormatUYVY422", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 13 - // CU_EGL_COLOR_FORMAT_ABGR = 0x0E - {"cudaEglColorFormatABGR", {"hipEglColorFormatABGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 14 - // CU_EGL_COLOR_FORMAT_BGRA = 0x0F - {"cudaEglColorFormatBGRA", {"hipEglColorFormatBGRA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 15 - // CU_EGL_COLOR_FORMAT_A = 0x10 - {"cudaEglColorFormatA", {"hipEglColorFormatA", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 16 - // CU_EGL_COLOR_FORMAT_RG = 0x11 - {"cudaEglColorFormatRG", {"hipEglColorFormatRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 17 - // CU_EGL_COLOR_FORMAT_AYUV = 0x12 - {"cudaEglColorFormatAYUV", {"hipEglColorFormatAYUV", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 18 - // CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR = 0x13 - {"cudaEglColorFormatYVU444SemiPlanar", {"hipEglColorFormatYVU444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 19 - // CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR = 0x14 - {"cudaEglColorFormatYVU422SemiPlanar", {"hipEglColorFormatYVU422SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 20 - // CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR = 0x15 - {"cudaEglColorFormatYVU420SemiPlanar", {"hipEglColorFormatYVU420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 21 - // CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR = 0x16 - {"cudaEglColorFormatY10V10U10_444SemiPlanar", {"hipEglColorFormatY10V10U10_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 22 - // CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR = 0x17 - {"cudaEglColorFormatY10V10U10_420SemiPlanar", {"hipEglColorFormatY10V10U10_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 23 - // CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR = 0x18 - {"cudaEglColorFormatY12V12U12_444SemiPlanar", {"hipEglColorFormatY12V12U12_444SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 24 - // CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR = 0x19 - {"cudaEglColorFormatY12V12U12_420SemiPlanar", {"hipEglColorFormatY12V12U12_420SemiPlanar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 25 - // CU_EGL_COLOR_FORMAT_VYUY_ER = 0x1A - {"cudaEglColorFormatVYUY_ER", {"hipEglColorFormatVYUY_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 26 - // CU_EGL_COLOR_FORMAT_UYVY_ER = 0x1B - {"cudaEglColorFormatUYVY_ER", {"hipEglColorFormatUYVY_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // CU_EGL_COLOR_FORMAT_YUYV_ER = 0x1C - {"cudaEglColorFormatYUYV_ER", {"hipEglColorFormatYUYV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // CU_EGL_COLOR_FORMAT_YVYU_ER = 0x1D - {"cudaEglColorFormatYVYU_ER", {"hipEglColorFormatYVYU_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 29 - // CU_EGL_COLOR_FORMAT_YUV_ER = 0x1E - {"cudaEglColorFormatYUV_ER", {"hipEglColorFormatYUV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 30 - // CU_EGL_COLOR_FORMAT_YUVA_ER = 0x1F - {"cudaEglColorFormatYUVA_ER", {"hipEglColorFormatYUVA_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 31 - // CU_EGL_COLOR_FORMAT_AYUV_ER = 0x20 - {"cudaEglColorFormatAYUV_ER", {"hipEglColorFormatAYUV_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 32 - // CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER = 0x21 - {"cudaEglColorFormatYUV444Planar_ER", {"hipEglColorFormatYUV444Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 33 - // CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER = 0x22 - {"cudaEglColorFormatYUV422Planar_ER", {"hipEglColorFormatYUV422Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 34 - // CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER = 0x23 - {"cudaEglColorFormatYUV420Planar_ER", {"hipEglColorFormatYUV420Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 35 - // CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER = 0x24 - {"cudaEglColorFormatYUV444SemiPlanar_ER", {"hipEglColorFormatYUV444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 36 - // CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER = 0x25 - {"cudaEglColorFormatYUV422SemiPlanar_ER", {"hipEglColorFormatYUV422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 37 - // CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER = 0x26 - {"cudaEglColorFormatYUV420SemiPlanar_ER", {"hipEglColorFormatYUV420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 38 - // CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER = 0x27 - {"cudaEglColorFormatYVU444Planar_ER", {"hipEglColorFormatYVU444Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 39 - // CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER = 0x28 - {"cudaEglColorFormatYVU422Planar_ER", {"hipEglColorFormatYVU422Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 40 - // CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER = 0x29 - {"cudaEglColorFormatYVU420Planar_ER", {"hipEglColorFormatYVU420Planar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 41 - // CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER = 0x2A - {"cudaEglColorFormatYVU444SemiPlanar_ER", {"hipEglColorFormatYVU444SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 42 - // CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER = 0x2B - {"cudaEglColorFormatYVU422SemiPlanar_ER", {"hipEglColorFormatYVU422SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER = 0x2C - {"cudaEglColorFormatYVU420SemiPlanar_ER", {"hipEglColorFormatYVU420SemiPlanar_ER", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 44 - // CU_EGL_COLOR_FORMAT_BAYER_RGGB = 0x2D - {"cudaEglColorFormatBayerRGGB", {"hipEglColorFormatBayerRGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // CU_EGL_COLOR_FORMAT_BAYER_BGGR = 0x2E - {"cudaEglColorFormatBayerBGGR", {"hipEglColorFormatBayerBGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // CU_EGL_COLOR_FORMAT_BAYER_GRBG = 0x2F - {"cudaEglColorFormatBayerGRBG", {"hipEglColorFormatBayerGRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 47 - // CU_EGL_COLOR_FORMAT_BAYER_GBRG = 0x30 - {"cudaEglColorFormatBayerGBRG", {"hipEglColorFormatBayerGBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 48 - // CU_EGL_COLOR_FORMAT_BAYER10_RGGB = 0x31 - {"cudaEglColorFormatBayer10RGGB", {"hipEglColorFormatBayer10RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // CU_EGL_COLOR_FORMAT_BAYER10_BGGR = 0x32 - {"cudaEglColorFormatBayer10BGGR", {"hipEglColorFormatBayer10BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 50 - // CU_EGL_COLOR_FORMAT_BAYER10_GRBG = 0x33 - {"cudaEglColorFormatBayer10GRBG", {"hipEglColorFormatBayer10GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 51 - // CU_EGL_COLOR_FORMAT_BAYER10_GBRG = 0x34 - {"cudaEglColorFormatBayer10GBRG", {"hipEglColorFormatBayer10GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 52 - // CU_EGL_COLOR_FORMAT_BAYER12_RGGB = 0x35 - {"cudaEglColorFormatBayer12RGGB", {"hipEglColorFormatBayer12RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 53 - // CU_EGL_COLOR_FORMAT_BAYER12_BGGR = 0x36 - {"cudaEglColorFormatBayer12BGGR", {"hipEglColorFormatBayer12BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 54 - // CU_EGL_COLOR_FORMAT_BAYER12_GRBG = 0x37 - {"cudaEglColorFormatBayer12GRBG", {"hipEglColorFormatBayer12GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 55 - // CU_EGL_COLOR_FORMAT_BAYER12_GBRG = 0x38 - {"cudaEglColorFormatBayer12GBRG", {"hipEglColorFormatBayer12GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 56 - // CU_EGL_COLOR_FORMAT_BAYER14_RGGB = 0x39 - {"cudaEglColorFormatBayer14RGGB", {"hipEglColorFormatBayer14RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 57 - // CU_EGL_COLOR_FORMAT_BAYER14_BGGR = 0x3A - {"cudaEglColorFormatBayer14BGGR", {"hipEglColorFormatBayer14BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 58 - // CU_EGL_COLOR_FORMAT_BAYER14_GRBG = 0x3B - {"cudaEglColorFormatBayer14GRBG", {"hipEglColorFormatBayer14GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 59 - // CU_EGL_COLOR_FORMAT_BAYER14_GBRG = 0x3C - {"cudaEglColorFormatBayer14GBRG", {"hipEglColorFormatBayer14GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 60 - // CU_EGL_COLOR_FORMAT_BAYER20_RGGB = 0x3D - {"cudaEglColorFormatBayer20RGGB", {"hipEglColorFormatBayer20RGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 61 - // CU_EGL_COLOR_FORMAT_BAYER20_BGGR = 0x3E - {"cudaEglColorFormatBayer20BGGR", {"hipEglColorFormatBayer20BGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 62 - // CU_EGL_COLOR_FORMAT_BAYER20_GRBG = 0x3F - {"cudaEglColorFormatBayer20GRBG", {"hipEglColorFormatBayer20GRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 63 - // CU_EGL_COLOR_FORMAT_BAYER20_GBRG = 0x40 - {"cudaEglColorFormatBayer20GBRG", {"hipEglColorFormatBayer20GBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // CU_EGL_COLOR_FORMAT_YVU444_PLANAR = 0x41 - {"cudaEglColorFormatYVU444Planar", {"hipEglColorFormatYVU444Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // CU_EGL_COLOR_FORMAT_YVU422_PLANAR = 0x42 - {"cudaEglColorFormatYVU422Planar", {"hipEglColorFormatYVU422Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // CU_EGL_COLOR_FORMAT_YVU420_PLANAR = 0x43 - {"cudaEglColorFormatYVU420Planar", {"hipEglColorFormatYVU420Planar", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB = 0x44 - {"cudaEglColorFormatBayerIspRGGB", {"hipEglColorFormatBayerIspRGGB", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR = 0x45 - {"cudaEglColorFormatBayerIspBGGR", {"hipEglColorFormatBayerIspBGGR", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG = 0x46 - {"cudaEglColorFormatBayerIspGRBG", {"hipEglColorFormatBayerIspGRBG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 70 - // CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG = 0x47 - {"cudaEglColorFormatBayerIspGBRG", {"hipEglColorFormatBayerIspGBRG", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 71 - - // CUeglFrameType - {"cudaEglFrameType", {"hipEglFrameType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglFrameType enum values - // CU_EGL_FRAME_TYPE_ARRAY - {"cudaEglFrameTypeArray", {"hipEglFrameTypeArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_EGL_FRAME_TYPE_PITCH - {"cudaEglFrameTypePitch", {"hipEglFrameTypePitch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUeglResourceLocationFlags - {"cudaEglResourceLocationFlags", {"hipEglResourceLocationFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaEglResourceLocationFlagss enum values - // CU_EGL_RESOURCE_LOCATION_SYSMEM - {"cudaEglResourceLocationSysmem", {"hipEglResourceLocationSysmem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_EGL_RESOURCE_LOCATION_VIDMEM - {"cudaEglResourceLocationVidmem", {"hipEglResourceLocationVidmem", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - - // CUresult - {"cudaError", {"hipError_t", "", CONV_TYPE, API_RUNTIME}}, - {"cudaError_t", {"hipError_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaError enum values - // CUDA_SUCCESS - {"cudaSuccess", {"hipSuccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CUDA_ERROR_INVALID_VALUE - {"cudaErrorInvalidValue", {"hipErrorInvalidValue", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CUDA_ERROR_OUT_OF_MEMORY - {"cudaErrorMemoryAllocation", {"hipErrorOutOfMemory", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CUDA_ERROR_NOT_INITIALIZED - {"cudaErrorInitializationError", {"hipErrorNotInitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - // CUDA_ERROR_DEINITIALIZED - {"cudaErrorCudartUnloading", {"hipErrorDeinitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - // CUDA_ERROR_PROFILER_DISABLED - {"cudaErrorProfilerDisabled", {"hipErrorProfilerDisabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 5 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_NOT_INITIALIZED - {"cudaErrorProfilerNotInitialized", {"hipErrorProfilerNotInitialized", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 6 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_ALREADY_STARTED - {"cudaErrorProfilerAlreadyStarted", {"hipErrorProfilerAlreadyStarted", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 7 - // Deprecated since CUDA 5.0 - // CUDA_ERROR_PROFILER_ALREADY_STOPPED - {"cudaErrorProfilerAlreadyStopped", {"hipErrorProfilerAlreadyStopped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 8 - // no analogue - {"cudaErrorInvalidConfiguration", {"hipErrorInvalidConfiguration", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 9 - // no analogue - {"cudaErrorInvalidPitchValue", {"hipErrorInvalidPitchValue", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 12 - // no analogue - {"cudaErrorInvalidSymbol", {"hipErrorInvalidSymbol", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 13 - // Deprecated since CUDA 10.1 - // no analogue - {"cudaErrorInvalidHostPointer", {"hipErrorInvalidHostPointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 16 - // Deprecated since CUDA 10.1 - // no analogue - {"cudaErrorInvalidDevicePointer", {"hipErrorInvalidDevicePointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 17 - // no analogue - {"cudaErrorInvalidTexture", {"hipErrorInvalidTexture", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 18 - // no analogue - {"cudaErrorInvalidTextureBinding", {"hipErrorInvalidTextureBinding", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 19 - // no analogue - {"cudaErrorInvalidChannelDescriptor", {"hipErrorInvalidChannelDescriptor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 20 - // no analogue - {"cudaErrorInvalidMemcpyDirection", {"hipErrorInvalidMemcpyDirection", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 21 - // no analogue - {"cudaErrorAddressOfConstant", {"hipErrorAddressOfConstant", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 22 - // no analogue - {"cudaErrorTextureFetchFailed", {"hipErrorTextureFetchFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 23 - // no analogue - {"cudaErrorTextureNotBound", {"hipErrorTextureNotBound", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 24 - // no analogue - {"cudaErrorSynchronizationError", {"hipErrorSynchronizationError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 25 - // no analogue - {"cudaErrorInvalidFilterSetting", {"hipErrorInvalidFilterSetting", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 26 - // no analogue - {"cudaErrorInvalidNormSetting", {"hipErrorInvalidNormSetting", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 27 - // no analogue - {"cudaErrorMixedDeviceExecution", {"hipErrorMixedDeviceExecution", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 28 - // Deprecated since CUDA 4.1 - // no analogue - {"cudaErrorNotYetImplemented", {"hipErrorNotYetImplemented", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 31 - // Deprecated since CUDA 3.1 - // no analogue - {"cudaErrorMemoryValueTooLarge", {"hipErrorMemoryValueTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 32 - // no analogue - {"cudaErrorInsufficientDriver", {"hipErrorInsufficientDriver", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 35 - // no analogue - {"cudaErrorInvalidSurface", {"hipErrorInvalidSurface", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 37 - // no analogue - {"cudaErrorDuplicateVariableName", {"hipErrorDuplicateVariableName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 43 - // no analogue - {"cudaErrorDuplicateTextureName", {"hipErrorDuplicateTextureName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 44 - // no analogue - {"cudaErrorDuplicateSurfaceName", {"hipErrorDuplicateSurfaceName", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 45 - // no analogue - {"cudaErrorDevicesUnavailable", {"hipErrorDevicesUnavailable", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 46 - // no analogue - {"cudaErrorIncompatibleDriverContext", {"hipErrorIncompatibleDriverContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 49 - // no analogue - {"cudaErrorMissingConfiguration", {"hipErrorMissingConfiguration", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 52 - // no analogue - {"cudaErrorPriorLaunchFailure", {"hipErrorPriorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 53 - // no analogue - {"cudaErrorLaunchMaxDepthExceeded", {"hipErrorLaunchMaxDepthExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 65 - // no analogue - {"cudaErrorLaunchFileScopedTex", {"hipErrorLaunchFileScopedTex", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 66 - // no analogue - {"cudaErrorLaunchFileScopedSurf", {"hipErrorLaunchFileScopedSurf", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 67 - // no analogue - {"cudaErrorSyncDepthExceeded", {"hipErrorSyncDepthExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 68 - // no analogue - {"cudaErrorLaunchPendingCountExceeded", {"hipErrorLaunchPendingCountExceeded", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 69 - // no analogue - {"cudaErrorInvalidDeviceFunction", {"hipErrorInvalidDeviceFunction", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 98 - // CUDA_ERROR_NO_DEVICE - {"cudaErrorNoDevice", {"hipErrorNoDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 100 - // CUDA_ERROR_INVALID_DEVICE - {"cudaErrorInvalidDevice", {"hipErrorInvalidDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 101 - // no analogue - {"cudaErrorStartupFailure", {"hipErrorStartupFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 127 - // CUDA_ERROR_INVALID_IMAGE - {"cudaErrorInvalidKernelImage", {"hipErrorInvalidImage", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 200 - // CUDA_ERROR_INVALID_CONTEXT - {"cudaErrorDeviceUninitialized", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 201 - // Typo fixed in 10.2 - // CUDA_ERROR_INVALID_CONTEXT - {"cudaErrorDeviceUninitilialized", {"hipErrorInvalidContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 201 - // CUDA_ERROR_MAP_FAILED - {"cudaErrorMapBufferObjectFailed", {"hipErrorMapFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 205 - // CUDA_ERROR_UNMAP_FAILED - {"cudaErrorUnmapBufferObjectFailed", {"hipErrorUnmapFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 206 - // CUDA_ERROR_ARRAY_IS_MAPPED - {"cudaErrorArrayIsMapped", {"hipErrorArrayIsMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 207 - // CUDA_ERROR_ALREADY_MAPPED - {"cudaErrorAlreadyMapped", {"hipErrorAlreadyMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 208 - // CUDA_ERROR_NO_BINARY_FOR_GPU - {"cudaErrorNoKernelImageForDevice", {"hipErrorNoBinaryForGpu", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 209 - // CUDA_ERROR_ALREADY_ACQUIRED - {"cudaErrorAlreadyAcquired", {"hipErrorAlreadyAcquired", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 210 - // CUDA_ERROR_NOT_MAPPED - {"cudaErrorNotMapped", {"hipErrorNotMapped", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 211 - // CUDA_ERROR_NOT_MAPPED_AS_ARRAY - {"cudaErrorNotMappedAsArray", {"hipErrorNotMappedAsArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 212 - // CUDA_ERROR_NOT_MAPPED_AS_POINTER - {"cudaErrorNotMappedAsPointer", {"hipErrorNotMappedAsPointer", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 213 - // CUDA_ERROR_ECC_UNCORRECTABLE - {"cudaErrorECCUncorrectable", {"hipErrorECCNotCorrectable", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 214 - // CUDA_ERROR_UNSUPPORTED_LIMIT - {"cudaErrorUnsupportedLimit", {"hipErrorUnsupportedLimit", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 215 - // CUDA_ERROR_CONTEXT_ALREADY_IN_USE - {"cudaErrorDeviceAlreadyInUse", {"hipErrorContextAlreadyInUse", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 216 - // CUDA_ERROR_PEER_ACCESS_UNSUPPORTED - {"cudaErrorPeerAccessUnsupported", {"hipErrorPeerAccessUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 217 - // CUDA_ERROR_INVALID_PTX - {"cudaErrorInvalidPtx", {"hipErrorInvalidKernelFile", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 218 - // CUDA_ERROR_INVALID_GRAPHICS_CONTEXT - {"cudaErrorInvalidGraphicsContext", {"hipErrorInvalidGraphicsContext", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 219 - // CUDA_ERROR_NVLINK_UNCORRECTABLE - {"cudaErrorNvlinkUncorrectable", {"hipErrorNvlinkUncorrectable", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 220 - // CUDA_ERROR_JIT_COMPILER_NOT_FOUND - {"cudaErrorJitCompilerNotFound", {"hipErrorJitCompilerNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 221 - // CUDA_ERROR_INVALID_SOURCE - {"cudaErrorInvalidSource", {"hipErrorInvalidSource", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 300 - // CUDA_ERROR_FILE_NOT_FOUND - {"cudaErrorFileNotFound", {"hipErrorFileNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 301 - // CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND - {"cudaErrorSharedObjectSymbolNotFound", {"hipErrorSharedObjectSymbolNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 302 - // CUDA_ERROR_SHARED_OBJECT_INIT_FAILED - {"cudaErrorSharedObjectInitFailed", {"hipErrorSharedObjectInitFailed", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 303 - // CUDA_ERROR_OPERATING_SYSTEM - {"cudaErrorOperatingSystem", {"hipErrorOperatingSystem", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 304 - // CUDA_ERROR_INVALID_HANDLE - {"cudaErrorInvalidResourceHandle", {"hipErrorInvalidHandle", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 400 - // CUDA_ERROR_ILLEGAL_STATE - {"cudaErrorIllegalState", {"hipErrorIllegalState", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 401 - // CUDA_ERROR_NOT_FOUND - {"cudaErrorSymbolNotFound", {"hipErrorNotFound", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 500 - // CUDA_ERROR_NOT_READY - {"cudaErrorNotReady", {"hipErrorNotReady", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 600 - // CUDA_ERROR_ILLEGAL_ADDRESS - {"cudaErrorIllegalAddress", {"hipErrorIllegalAddress", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 700 - // CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES - {"cudaErrorLaunchOutOfResources", {"hipErrorLaunchOutOfResources", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 701 - // CUDA_ERROR_LAUNCH_TIMEOUT - {"cudaErrorLaunchTimeout", {"hipErrorLaunchTimeOut", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 702 - // CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING - {"cudaErrorLaunchIncompatibleTexturing", {"hipErrorLaunchIncompatibleTexturing", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 703 - // CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED - {"cudaErrorPeerAccessAlreadyEnabled", {"hipErrorPeerAccessAlreadyEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 704 - // CUDA_ERROR_PEER_ACCESS_NOT_ENABLED - {"cudaErrorPeerAccessNotEnabled", {"hipErrorPeerAccessNotEnabled", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 705 - // CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE - {"cudaErrorSetOnActiveProcess", {"hipErrorSetOnActiveProcess", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 708 - // CUDA_ERROR_CONTEXT_IS_DESTROYED - {"cudaErrorContextIsDestroyed", {"hipErrorContextIsDestroyed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 709 - // CUDA_ERROR_ASSERT - {"cudaErrorAssert", {"hipErrorAssert", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 710 - // CUDA_ERROR_TOO_MANY_PEERS - {"cudaErrorTooManyPeers", {"hipErrorTooManyPeers", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 711 - // CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED - {"cudaErrorHostMemoryAlreadyRegistered", {"hipErrorHostMemoryAlreadyRegistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 712 - // CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED - {"cudaErrorHostMemoryNotRegistered", {"hipErrorHostMemoryNotRegistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 713 - // CUDA_ERROR_HARDWARE_STACK_ERROR - {"cudaErrorHardwareStackError", {"hipErrorHardwareStackError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 714 - // CUDA_ERROR_ILLEGAL_INSTRUCTION - {"cudaErrorIllegalInstruction", {"hipErrorIllegalInstruction", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 715 - // CUDA_ERROR_MISALIGNED_ADDRESS - {"cudaErrorMisalignedAddress", {"hipErrorMisalignedAddress", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 716 - // CUDA_ERROR_INVALID_ADDRESS_SPACE - {"cudaErrorInvalidAddressSpace", {"hipErrorInvalidAddressSpace", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 717 - // CUDA_ERROR_INVALID_PC - {"cudaErrorInvalidPc", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 718 - // CUDA_ERROR_LAUNCH_FAILED - {"cudaErrorLaunchFailure", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 719 - // CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE - {"cudaErrorCooperativeLaunchTooLarge", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 720 - // CUDA_ERROR_NOT_PERMITTED - {"cudaErrorNotPermitted", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 800 - // CUDA_ERROR_NOT_SUPPORTED - {"cudaErrorNotSupported", {"hipErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 801 - // CUDA_ERROR_SYSTEM_NOT_READY - {"cudaErrorSystemNotReady", {"hipErrorSystemNotReady", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 802 - // CUDA_ERROR_SYSTEM_DRIVER_MISMATCH - {"cudaErrorSystemDriverMismatch", {"hipErrorSystemDriverMismatch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 803 - // CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE - {"cudaErrorCompatNotSupportedOnDevice", {"hipErrorCompatNotSupportedOnDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 804 - // CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED - {"cudaErrorStreamCaptureUnsupported", {"hipErrorStreamCaptureUnsupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 900 - // CUDA_ERROR_STREAM_CAPTURE_INVALIDATED - {"cudaErrorStreamCaptureInvalidated", {"hipErrorStreamCaptureInvalidated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 901 - // CUDA_ERROR_STREAM_CAPTURE_MERGE - {"cudaErrorStreamCaptureMerge", {"hipErrorStreamCaptureMerge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 902 - // CUDA_ERROR_STREAM_CAPTURE_UNMATCHED - {"cudaErrorStreamCaptureUnmatched", {"hipErrorStreamCaptureUnmatched", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 903 - // CUDA_ERROR_STREAM_CAPTURE_UNJOINED - {"cudaErrorStreamCaptureUnjoined", {"hipErrorStreamCaptureUnjoined", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 904 - // CUDA_ERROR_STREAM_CAPTURE_ISOLATION - {"cudaErrorStreamCaptureIsolation", {"hipErrorStreamCaptureIsolation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 905 - // CUDA_ERROR_STREAM_CAPTURE_IMPLICIT - {"cudaErrorStreamCaptureImplicit", {"hipErrorStreamCaptureImplicit", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 906 - // CUDA_ERROR_CAPTURED_EVENT - {"cudaErrorCapturedEvent", {"hipErrorCapturedEvent", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 907 - // CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD - {"cudaErrorStreamCaptureWrongThread", {"hipErrorStreamCaptureWrongThread", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 908 - // CUDA_ERROR_TIMEOUT - {"cudaErrorTimeout", {"hipErrorTimeout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 909 - // CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE - {"cudaErrorGraphExecUpdateFailure", {"hipErrorGraphExecUpdateFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 910 - // CUDA_ERROR_UNKNOWN - {"cudaErrorUnknown", {"hipErrorUnknown", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 999 - // Deprecated since CUDA 4.1 - {"cudaErrorApiFailureBase", {"hipErrorApiFailureBase", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10000 - - // CUexternalMemoryHandleType - {"cudaExternalMemoryHandleType", {"hipExternalMemoryHandleType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaExternalMemoryHandleType enum values - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD - {"cudaExternalMemoryHandleTypeOpaqueFd", {"hipExternalMemoryHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 - {"cudaExternalMemoryHandleTypeOpaqueWin32", {"hipExternalMemoryHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT - {"cudaExternalMemoryHandleTypeOpaqueWin32Kmt", {"hipExternalMemoryHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP - {"cudaExternalMemoryHandleTypeD3D12Heap", {"hipExternalMemoryHandleTypeD3D12Heap", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE - {"cudaExternalMemoryHandleTypeD3D12Resource", {"hipExternalMemoryHandleTypeD3D12Resource", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE - {"cudaExternalMemoryHandleTypeD3D11Resource", {"hipExternalMemoryHandleTypeD3D11Resource", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT - {"cudaExternalMemoryHandleTypeD3D11ResourceKmt", {"hipExternalMemoryHandleTypeD3D11ResourceKmt", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF - {"cudaExternalMemoryHandleTypeNvSciBuf", {"hipExternalMemoryHandleTypeNvSciBuf", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUexternalSemaphoreHandleType - {"cudaExternalSemaphoreHandleType", {"hipExternalSemaphoreHandleType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaExternalSemaphoreHandleType enum values - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD - {"cudaExternalSemaphoreHandleTypeOpaqueFd", {"hipExternalSemaphoreHandleTypeOpaqueFD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 - {"cudaExternalSemaphoreHandleTypeOpaqueWin32", {"hipExternalSemaphoreHandleTypeOpaqueWin32", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT - {"cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt", {"hipExternalSemaphoreHandleTypeOpaqueWin32KMT", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE - {"cudaExternalSemaphoreHandleTypeD3D12Fence", {"hipExternalSemaphoreHandleTypeD3D12Fence", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE - {"cudaExternalSemaphoreHandleTypeD3D11Fence", {"hipExternalSemaphoreHandleTypeD3D11Fence", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC - {"cudaExternalSemaphoreHandleTypeNvSciSync", {"hipExternalSemaphoreHandleTypeNvSciSync", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX - {"cudaExternalSemaphoreHandleTypeKeyedMutex", {"hipExternalSemaphoreHandleTypeKeyedMutex", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 7 - // CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT - {"cudaExternalSemaphoreHandleTypeKeyedMutexKmt", {"hipExternalSemaphoreHandleTypeKeyedMutexKmt", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUfunction_attribute - // NOTE: only last, starting from 8, values are presented and are equal to Driver's ones - {"cudaFuncAttribute", {"hipFuncAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaFuncAttribute enum values - // CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES - {"cudaFuncAttributeMaxDynamicSharedMemorySize", {"hipFuncAttributeMaxDynamicSharedMemorySize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - // CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT - {"cudaFuncAttributePreferredSharedMemoryCarveout", {"hipFuncAttributePreferredSharedMemoryCarveout", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 9 - // CU_FUNC_ATTRIBUTE_MAX - {"cudaFuncAttributeMax", {"hipFuncAttributeMax", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 10 - - // CUfunc_cache - {"cudaFuncCache", {"hipFuncCache_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaFuncCache enum values - // CU_FUNC_CACHE_PREFER_NONE = 0x00 - {"cudaFuncCachePreferNone", {"hipFuncCachePreferNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_FUNC_CACHE_PREFER_SHARED = 0x01 - {"cudaFuncCachePreferShared", {"hipFuncCachePreferShared", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_FUNC_CACHE_PREFER_L1 = 0x02 - {"cudaFuncCachePreferL1", {"hipFuncCachePreferL1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - // CU_FUNC_CACHE_PREFER_EQUAL = 0x03 - {"cudaFuncCachePreferEqual", {"hipFuncCachePreferEqual", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUarray_cubemap_face - {"cudaGraphicsCubeFace", {"hipGraphicsCubeFace", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsCubeFace enum values - // CU_CUBEMAP_FACE_POSITIVE_X - {"cudaGraphicsCubeFacePositiveX", {"hipGraphicsCubeFacePositiveX", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_CUBEMAP_FACE_NEGATIVE_X - {"cudaGraphicsCubeFaceNegativeX", {"hipGraphicsCubeFaceNegativeX", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_CUBEMAP_FACE_POSITIVE_Y - {"cudaGraphicsCubeFacePositiveY", {"hipGraphicsCubeFacePositiveY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CU_CUBEMAP_FACE_NEGATIVE_Y - {"cudaGraphicsCubeFaceNegativeY", {"hipGraphicsCubeFaceNegativeY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_CUBEMAP_FACE_POSITIVE_Z - {"cudaGraphicsCubeFacePositiveZ", {"hipGraphicsCubeFacePositiveZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_CUBEMAP_FACE_NEGATIVE_Z - {"cudaGraphicsCubeFaceNegativeZ", {"hipGraphicsCubeFaceNegativeZ", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - - // CUgraphicsMapResourceFlags - {"cudaGraphicsMapFlags", {"hipGraphicsMapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsMapFlags enum values - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGraphicsMapFlagsNone", {"hipGraphicsMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGraphicsMapFlagsReadOnly", {"hipGraphicsMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGraphicsMapFlagsWriteDiscard", {"hipGraphicsMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUgraphicsRegisterFlags - {"cudaGraphicsRegisterFlags", {"hipGraphicsRegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphicsRegisterFlags enum values - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGraphicsRegisterFlagsNone", {"hipGraphicsRegisterFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGraphicsRegisterFlagsReadOnly", {"hipGraphicsRegisterFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGraphicsRegisterFlagsWriteDiscard", {"hipGraphicsRegisterFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04 - {"cudaGraphicsRegisterFlagsSurfaceLoadStore", {"hipGraphicsRegisterFlagsSurfaceLoadStore", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08 - {"cudaGraphicsRegisterFlagsTextureGather", {"hipGraphicsRegisterFlagsTextureGather", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 8 - - // CUgraphNodeType - {"cudaGraphNodeType", {"hipGraphNodeType", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphNodeType enum values - // CU_GRAPH_NODE_TYPE_KERNEL = 0 - {"cudaGraphNodeTypeKernel", {"hipGraphNodeTypeKernel", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_GRAPH_NODE_TYPE_MEMCPY = 1 - {"cudaGraphNodeTypeMemcpy", {"hipGraphNodeTypeMemcpy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_GRAPH_NODE_TYPE_MEMSET = 2 - {"cudaGraphNodeTypeMemset", {"hipGraphNodeTypeMemset", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CU_GRAPH_NODE_TYPE_HOST = 3 - {"cudaGraphNodeTypeHost", {"hipGraphNodeTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_GRAPH_NODE_TYPE_GRAPH = 4 - {"cudaGraphNodeTypeGraph", {"hipGraphNodeTypeGraph", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_GRAPH_NODE_TYPE_EMPTY = 5 - {"cudaGraphNodeTypeEmpty", {"hipGraphNodeTypeEmpty", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - // CU_GRAPH_NODE_TYPE_COUNT - {"cudaGraphNodeTypeCount", {"hipGraphNodeTypeCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUgraphExecUpdateResult - {"cudaGraphExecUpdateResult", {"hipGraphExecUpdateResult", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGraphExecUpdateResult enum values - // CU_GRAPH_EXEC_UPDATE_SUCCESS - {"cudaGraphExecUpdateSuccess", {"hipGraphExecUpdateSuccess", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x0 - // CU_GRAPH_EXEC_UPDATE_ERROR - {"cudaGraphExecUpdateError", {"hipGraphExecUpdateError", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED - {"cudaGraphExecUpdateErrorTopologyChanged", {"hipGraphExecUpdateErrorTopologyChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x2 - // CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED - {"cudaGraphExecUpdateErrorNodeTypeChanged", {"hipGraphExecUpdateErrorNodeTypeChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x3 - // CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED - {"cudaGraphExecUpdateErrorFunctionChanged", {"hipGraphExecUpdateErrorFunctionChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x4 - // CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED - {"cudaGraphExecUpdateErrorParametersChanged", {"hipGraphExecUpdateErrorParametersChanged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x5 - // CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED - {"cudaGraphExecUpdateErrorNotSupported", {"hipGraphExecUpdateErrorNotSupported", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x6 - - // CUlimit - {"cudaLimit", {"hipLimit_t", "", CONV_TYPE, API_RUNTIME}}, - // cudaLimit enum values - // CU_LIMIT_STACK_SIZE - {"cudaLimitStackSize", {"hipLimitStackSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - // CU_LIMIT_PRINTF_FIFO_SIZE - {"cudaLimitPrintfFifoSize", {"hipLimitPrintfFifoSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_LIMIT_MALLOC_HEAP_SIZE - {"cudaLimitMallocHeapSize", {"hipLimitMallocHeapSize", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH - {"cudaLimitDevRuntimeSyncDepth", {"hipLimitDevRuntimeSyncDepth", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x03 - // CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT - {"cudaLimitDevRuntimePendingLaunchCount", {"hipLimitDevRuntimePendingLaunchCount", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // CU_LIMIT_MAX_L2_FETCH_GRANULARITY - {"cudaLimitMaxL2FetchGranularity", {"hipLimitMaxL2FetchGranularity", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x05 - - // no analogue - {"cudaMemcpyKind", {"hipMemcpyKind", "", CONV_TYPE, API_RUNTIME}}, - // cudaMemcpyKind enum values - {"cudaMemcpyHostToHost", {"hipMemcpyHostToHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaMemcpyHostToDevice", {"hipMemcpyHostToDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaMemcpyDeviceToHost", {"hipMemcpyDeviceToHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaMemcpyDeviceToDevice", {"hipMemcpyDeviceToDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - {"cudaMemcpyDefault", {"hipMemcpyDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 4 - - // CUmem_advise - {"cudaMemoryAdvise", {"hipMemAdvise", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemoryAdvise enum values - // CU_MEM_ADVISE_SET_READ_MOSTLY - {"cudaMemAdviseSetReadMostly", {"hipMemAdviseSetReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_MEM_ADVISE_UNSET_READ_MOSTLY - {"cudaMemAdviseUnsetReadMostly", {"hipMemAdviseUnsetReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_MEM_ADVISE_SET_PREFERRED_LOCATION - {"cudaMemAdviseSetPreferredLocation", {"hipMemAdviseSetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION - {"cudaMemAdviseUnsetPreferredLocation", {"hipMemAdviseUnsetPreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - // CU_MEM_ADVISE_SET_ACCESSED_BY - {"cudaMemAdviseSetAccessedBy", {"hipMemAdviseSetAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 5 - // CU_MEM_ADVISE_UNSET_ACCESSED_BY - {"cudaMemAdviseUnsetAccessedBy", {"hipMemAdviseUnsetAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 6 - - // no analogue - // NOTE: CUmemorytype is partial analogue - {"cudaMemoryType", {"hipMemoryType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemoryType enum values - {"cudaMemoryTypeUnregistered", {"hipMemoryTypeUnregistered", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaMemoryTypeHost", {"hipMemoryTypeHost", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - {"cudaMemoryTypeDevice", {"hipMemoryTypeDevice", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - {"cudaMemoryTypeManaged", {"hipMemoryTypeManaged", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUmem_range_attribute - {"cudaMemRangeAttribute", {"hipMemRangeAttribute", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaMemRangeAttribute enum values - // CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY - {"cudaMemRangeAttributeReadMostly", {"hipMemRangeAttributeReadMostly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION - {"cudaMemRangeAttributePreferredLocation", {"hipMemRangeAttributePreferredLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY - {"cudaMemRangeAttributeAccessedBy", {"hipMemRangeAttributeAccessedBy", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION - {"cudaMemRangeAttributeLastPrefetchLocation", {"hipMemRangeAttributeLastPrefetchLocation", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 4 - - // no analogue - {"cudaOutputMode", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaOutputMode_t", {"hipOutputMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaOutputMode enum values - {"cudaKeyValuePair", {"hipKeyValuePair", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x00 - {"cudaCSV", {"hipCSV", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - - // CUresourcetype - {"cudaResourceType", {"hipResourceType", "", CONV_TYPE, API_RUNTIME}}, - // cudaResourceType enum values - // CU_RESOURCE_TYPE_ARRAY - {"cudaResourceTypeArray", {"hipResourceTypeArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x00 - // CU_RESOURCE_TYPE_MIPMAPPED_ARRAY - {"cudaResourceTypeMipmappedArray", {"hipResourceTypeMipmappedArray", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x01 - // CU_RESOURCE_TYPE_LINEAR - {"cudaResourceTypeLinear", {"hipResourceTypeLinear", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_RESOURCE_TYPE_PITCH2D - {"cudaResourceTypePitch2D", {"hipResourceTypePitch2D", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x03 - - // CUresourceViewFormat - {"cudaResourceViewFormat", {"hipResourceViewFormat", "", CONV_TYPE, API_RUNTIME}}, - // enum cudaResourceViewFormat - // CU_RES_VIEW_FORMAT_NONE - {"cudaResViewFormatNone", {"hipResViewFormatNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x00 - // CU_RES_VIEW_FORMAT_UINT_1X8 - {"cudaResViewFormatUnsignedChar1", {"hipResViewFormatUnsignedChar1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x01 - // CU_RES_VIEW_FORMAT_UINT_2X8 - {"cudaResViewFormatUnsignedChar2", {"hipResViewFormatUnsignedChar2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x02 - // CU_RES_VIEW_FORMAT_UINT_4X8 - {"cudaResViewFormatUnsignedChar4", {"hipResViewFormatUnsignedChar4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x03 - // CU_RES_VIEW_FORMAT_SINT_1X8 - {"cudaResViewFormatSignedChar1", {"hipResViewFormatSignedChar1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x04 - // CU_RES_VIEW_FORMAT_SINT_2X8 - {"cudaResViewFormatSignedChar2", {"hipResViewFormatSignedChar2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x05 - // CU_RES_VIEW_FORMAT_SINT_4X8 - {"cudaResViewFormatSignedChar4", {"hipResViewFormatSignedChar4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x06 - // CU_RES_VIEW_FORMAT_UINT_1X16 - {"cudaResViewFormatUnsignedShort1", {"hipResViewFormatUnsignedShort1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x07 - // CU_RES_VIEW_FORMAT_UINT_2X16 - {"cudaResViewFormatUnsignedShort2", {"hipResViewFormatUnsignedShort2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x08 - // CU_RES_VIEW_FORMAT_UINT_4X16 - {"cudaResViewFormatUnsignedShort4", {"hipResViewFormatUnsignedShort4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x09 - // CU_RES_VIEW_FORMAT_SINT_1X16 - {"cudaResViewFormatSignedShort1", {"hipResViewFormatSignedShort1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0a - // CU_RES_VIEW_FORMAT_SINT_2X16 - {"cudaResViewFormatSignedShort2", {"hipResViewFormatSignedShort2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0b - // CU_RES_VIEW_FORMAT_SINT_4X16 - {"cudaResViewFormatSignedShort4", {"hipResViewFormatSignedShort4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0c - // CU_RES_VIEW_FORMAT_UINT_1X32 - {"cudaResViewFormatUnsignedInt1", {"hipResViewFormatUnsignedInt1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0d - // CU_RES_VIEW_FORMAT_UINT_2X32 - {"cudaResViewFormatUnsignedInt2", {"hipResViewFormatUnsignedInt2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0e - // CU_RES_VIEW_FORMAT_UINT_4X32 - {"cudaResViewFormatUnsignedInt4", {"hipResViewFormatUnsignedInt4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x0f - // CU_RES_VIEW_FORMAT_SINT_1X32 - {"cudaResViewFormatSignedInt1", {"hipResViewFormatSignedInt1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x10 - // CU_RES_VIEW_FORMAT_SINT_2X32 - {"cudaResViewFormatSignedInt2", {"hipResViewFormatSignedInt2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x11 - // CU_RES_VIEW_FORMAT_SINT_4X32 - {"cudaResViewFormatSignedInt4", {"hipResViewFormatSignedInt4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x12 - // CU_RES_VIEW_FORMAT_FLOAT_1X16 - {"cudaResViewFormatHalf1", {"hipResViewFormatHalf1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x13 - // CU_RES_VIEW_FORMAT_FLOAT_2X16 - {"cudaResViewFormatHalf2", {"hipResViewFormatHalf2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x14 - // CU_RES_VIEW_FORMAT_FLOAT_4X16 - {"cudaResViewFormatHalf4", {"hipResViewFormatHalf4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x15 - // CU_RES_VIEW_FORMAT_FLOAT_1X32 - {"cudaResViewFormatFloat1", {"hipResViewFormatFloat1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x16 - // CU_RES_VIEW_FORMAT_FLOAT_2X32 - {"cudaResViewFormatFloat2", {"hipResViewFormatFloat2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x17 - // CU_RES_VIEW_FORMAT_FLOAT_4X32 - {"cudaResViewFormatFloat4", {"hipResViewFormatFloat4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x18 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC1 - {"cudaResViewFormatUnsignedBlockCompressed1", {"hipResViewFormatUnsignedBlockCompressed1", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x19 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC2 - {"cudaResViewFormatUnsignedBlockCompressed2", {"hipResViewFormatUnsignedBlockCompressed2", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1a - // CU_RES_VIEW_FORMAT_UNSIGNED_BC3 - {"cudaResViewFormatUnsignedBlockCompressed3", {"hipResViewFormatUnsignedBlockCompressed3", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1b - // CU_RES_VIEW_FORMAT_UNSIGNED_BC4 - {"cudaResViewFormatUnsignedBlockCompressed4", {"hipResViewFormatUnsignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1c - // CU_RES_VIEW_FORMAT_SIGNED_BC4 - {"cudaResViewFormatSignedBlockCompressed4", {"hipResViewFormatSignedBlockCompressed4", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1d - // CU_RES_VIEW_FORMAT_UNSIGNED_BC5 - {"cudaResViewFormatUnsignedBlockCompressed5", {"hipResViewFormatUnsignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1e - // CU_RES_VIEW_FORMAT_SIGNED_BC5 - {"cudaResViewFormatSignedBlockCompressed5", {"hipResViewFormatSignedBlockCompressed5", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x1f - // CU_RES_VIEW_FORMAT_UNSIGNED_BC6H - {"cudaResViewFormatUnsignedBlockCompressed6H", {"hipResViewFormatUnsignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x20 - // CU_RES_VIEW_FORMAT_SIGNED_BC6H - {"cudaResViewFormatSignedBlockCompressed6H", {"hipResViewFormatSignedBlockCompressed6H", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x21 - // CU_RES_VIEW_FORMAT_UNSIGNED_BC7 - {"cudaResViewFormatUnsignedBlockCompressed7", {"hipResViewFormatUnsignedBlockCompressed7", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0x22 - - // CUshared_carveout - {"cudaSharedCarveout", {"hipSharedCarveout", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaSharedCarveout enum values - // CU_SHAREDMEM_CARVEOUT_DEFAULT - {"cudaSharedmemCarveoutDefault", {"hipSharedmemCarveoutDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // -1 - // CU_SHAREDMEM_CARVEOUT_MAX_SHARED - {"cudaSharedmemCarveoutMaxShared", {"hipSharedmemCarveoutMaxShared", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 100 - // CU_SHAREDMEM_CARVEOUT_MAX_L1 - {"cudaSharedmemCarveoutMaxL1", {"hipSharedmemCarveoutMaxL1", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - - // CUsharedconfig - {"cudaSharedMemConfig", {"hipSharedMemConfig", "", CONV_TYPE, API_RUNTIME}}, - // cudaSharedMemConfig enum values - // CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00 - {"cudaSharedMemBankSizeDefault", {"hipSharedMemBankSizeDefault", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01 - {"cudaSharedMemBankSizeFourByte", {"hipSharedMemBankSizeFourByte", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - // CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02 - {"cudaSharedMemBankSizeEightByte", {"hipSharedMemBankSizeEightByte", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - - // CUstreamCaptureStatus - {"cudaStreamCaptureStatus", {"hipStreamCaptureStatus", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaStreamCaptureStatus enum values - // CU_STREAM_CAPTURE_STATUS_NONE - {"cudaStreamCaptureStatusNone", {"hipStreamCaptureStatusNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_STREAM_CAPTURE_STATUS_ACTIVE - {"cudaStreamCaptureStatusActive", {"hipStreamCaptureStatusActive", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_STREAM_CAPTURE_STATUS_INVALIDATED - {"cudaStreamCaptureStatusInvalidated", {"hipStreamCaptureStatusInvalidated", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUstreamCaptureMode - {"cudaStreamCaptureMode", {"hipStreamCaptureMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaStreamCaptureMode enum values - // CU_STREAM_CAPTURE_MODE_GLOBAL - {"cudaStreamCaptureModeGlobal", {"hipStreamCaptureModeGlobal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_STREAM_CAPTURE_MODE_THREAD_LOCAL - {"cudaStreamCaptureModeThreadLocal", {"hipStreamCaptureModeThreadLocal", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_STREAM_CAPTURE_MODE_RELAXED - {"cudaStreamCaptureModeRelaxed", {"hipStreamCaptureModeRelaxed", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // no analogue - {"cudaSurfaceBoundaryMode", {"hipSurfaceBoundaryMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaSurfaceBoundaryMode enum values - {"cudaBoundaryModeZero", {"hipBoundaryModeZero", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaBoundaryModeClamp", {"hipBoundaryModeClamp", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaBoundaryModeTrap", {"hipBoundaryModeTrap", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - - // no analogue - {"cudaSurfaceFormatMode", {"hipSurfaceFormatMode", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // enum cudaSurfaceFormatMode - {"cudaFormatModeForced", {"hipFormatModeForced", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - {"cudaFormatModeAuto", {"hipFormatModeAuto", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // no analogue - {"cudaTextureAddressMode", {"hipTextureAddressMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureAddressMode enum values - {"cudaAddressModeWrap", {"hipAddressModeWrap", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaAddressModeClamp", {"hipAddressModeClamp", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - {"cudaAddressModeMirror", {"hipAddressModeMirror", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 2 - {"cudaAddressModeBorder", {"hipAddressModeBorder", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 3 - - // CUfilter_mode - {"cudaTextureFilterMode", {"hipTextureFilterMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureFilterMode enum values - // CU_TR_FILTER_MODE_POINT - {"cudaFilterModePoint", {"hipFilterModePoint", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - // CU_TR_FILTER_MODE_LINEAR - {"cudaFilterModeLinear", {"hipFilterModeLinear", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - - // no analogue - {"cudaTextureReadMode", {"hipTextureReadMode", "", CONV_TYPE, API_RUNTIME}}, - // cudaTextureReadMode enum values - {"cudaReadModeElementType", {"hipReadModeElementType", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 0 - {"cudaReadModeNormalizedFloat", {"hipReadModeNormalizedFloat", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 1 - - // CUGLDeviceList - {"cudaGLDeviceList", {"hipGLDeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGLDeviceList enum values - // CU_GL_DEVICE_LIST_ALL = 0x01 - {"cudaGLDeviceListAll", {"hipGLDeviceListAll", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaGLDeviceListCurrentFrame", {"hipGLDeviceListCurrentFrame", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaGLDeviceListNextFrame", {"hipGLDeviceListNextFrame", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUGLmap_flags - {"cudaGLMapFlags", {"hipGLMapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaGLMapFlags enum values - // CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00 - {"cudaGLMapFlagsNone", {"hipGLMapFlagsNone", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01 - {"cudaGLMapFlagsReadOnly", {"hipGLMapFlagsReadOnly", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 - {"cudaGLMapFlagsWriteDiscard", {"hipGLMapFlagsWriteDiscard", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d9DeviceList - {"cudaD3D9DeviceList", {"hipD3D9DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CUd3d9DeviceList enum values - // CU_D3D9_DEVICE_LIST_ALL = 0x01 - {"cudaD3D9DeviceListAll", {"HIP_D3D9_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D9_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D9DeviceListCurrentFrame", {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D9_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D9DeviceListNextFrame", {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUd3d9map_flags - {"cudaD3D9MapFlags", {"hipD3D9MapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9MapFlags enum values - // CU_D3D9_MAPRESOURCE_FLAGS_NONE = 0x00 - {"cudaD3D9MapFlagsNone", {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D9_MAPRESOURCE_FLAGS_READONLY = 0x01 - {"cudaD3D9MapFlagsReadOnly", {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD = 0x02 - {"cudaD3D9MapFlagsWriteDiscard", {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d9Register_flags - {"cudaD3D9RegisterFlags", {"hipD3D9RegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D9RegisterFlags enum values - // CU_D3D9_REGISTER_FLAGS_NONE = 0x00 - {"cudaD3D9RegisterFlagsNone", {"HIP_D3D9_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D9_REGISTER_FLAGS_ARRAY = 0x01 - {"cudaD3D9RegisterFlagsArray", {"HIP_D3D9_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUd3d10DeviceList - {"cudaD3D10DeviceList", {"hipd3d10DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10DeviceList enum values - // CU_D3D10_DEVICE_LIST_ALL = 0x01 - {"cudaD3D10DeviceListAll", {"HIP_D3D10_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D10_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D10DeviceListCurrentFrame", {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D10_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D10DeviceListNextFrame", {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // CUd3d10map_flags - {"cudaD3D10MapFlags", {"hipD3D10MapFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10MapFlags enum values - // CU_D3D10_MAPRESOURCE_FLAGS_NONE = 0x00 - {"cudaD3D10MapFlagsNone", {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D10_MAPRESOURCE_FLAGS_READONLY = 0x01 - {"cudaD3D10MapFlagsReadOnly", {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD = 0x02 - {"cudaD3D10MapFlagsWriteDiscard", {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - - // CUd3d10Register_flags - {"cudaD3D10RegisterFlags", {"hipD3D10RegisterFlags", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D10RegisterFlags enum values - // CU_D3D10_REGISTER_FLAGS_NONE = 0x00 - {"cudaD3D10RegisterFlagsNone", {"HIP_D3D10_REGISTER_FLAGS_NONE", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 0 - // CU_D3D10_REGISTER_FLAGS_ARRAY = 0x01 - {"cudaD3D10RegisterFlagsArray", {"HIP_D3D10_REGISTER_FLAGS_ARRAY", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - - // CUd3d11DeviceList - {"cudaD3D11DeviceList", {"hipd3d11DeviceList", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // cudaD3D11DeviceList enum values - // CU_D3D11_DEVICE_LIST_ALL = 0x01 - {"cudaD3D11DeviceListAll", {"HIP_D3D11_DEVICE_LIST_ALL", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 1 - // CU_D3D11_DEVICE_LIST_CURRENT_FRAME = 0x02 - {"cudaD3D11DeviceListCurrentFrame", {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 2 - // CU_D3D11_DEVICE_LIST_NEXT_FRAME = 0x03 - {"cudaD3D11DeviceListNextFrame", {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - - // no analogue - {"libraryPropertyType", {"hipLibraryPropertyType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - {"libraryPropertyType_t", {"hipLibraryPropertyType_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"MAJOR_VERSION", {"hipLibraryMajorVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"MINOR_VERSION", {"hipLibraryMinorVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - // no analogue - {"PATCH_LEVEL", {"hipLibraryPatchVersion", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 4. Typedefs - - // CUhostFn - {"cudaHostFn_t", {"hipHostFn", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // CUstreamCallback - {"cudaStreamCallback_t", {"hipStreamCallback_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUsurfObject - {"cudaSurfaceObject_t", {"hipSurfaceObject_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUtexObject - {"cudaTextureObject_t", {"hipTextureObject_t", "", CONV_TYPE, API_RUNTIME}}, - - // CUuuid - {"cudaUUID_t", {"hipUUID_t", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, - - // 5. Defines - - // no analogue - {"CUDA_EGL_MAX_PLANES", {"HIP_EGL_MAX_PLANES", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 3 - // CU_IPC_HANDLE_SIZE - {"CUDA_IPC_HANDLE_SIZE", {"HIP_IPC_HANDLE_SIZE", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 64 - // no analogue - {"cudaArrayDefault", {"hipArrayDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CUDA_ARRAY3D_LAYERED - {"cudaArrayLayered", {"hipArrayLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CUDA_ARRAY3D_SURFACE_LDST - {"cudaArraySurfaceLoadStore", {"hipArraySurfaceLoadStore", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CUDA_ARRAY3D_CUBEMAP - {"cudaArrayCubemap", {"hipArrayCubemap", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CUDA_ARRAY3D_TEXTURE_GATHER - {"cudaArrayTextureGather", {"hipArrayTextureGather", "", CONV_DEFINE, API_RUNTIME}}, // 0x08 - // CUDA_ARRAY3D_COLOR_ATTACHMENT - {"cudaArrayColorAttachment", {"hipArrayColorAttachment", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x20 - // CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC - {"cudaCooperativeLaunchMultiDeviceNoPreSync", {"hipCooperativeLaunchMultiDeviceNoPreSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC - {"cudaCooperativeLaunchMultiDeviceNoPostSync", {"hipCooperativeLaunchMultiDeviceNoPostSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_DEVICE_CPU ((CUdevice)-1) - {"cudaCpuDeviceId", {"hipCpuDeviceId", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((int)-1) - // CU_DEVICE_INVALID ((CUdevice)-2) - {"cudaInvalidDeviceId", {"hipInvalidDeviceId", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((int)-2) - // CU_CTX_BLOCKING_SYNC - // NOTE: Deprecated since CUDA 4.0 and replaced with cudaDeviceScheduleBlockingSync - {"cudaDeviceBlockingSync", {"hipDeviceScheduleBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_CTX_LMEM_RESIZE_TO_MAX - {"cudaDeviceLmemResizeToMax", {"hipDeviceLmemResizeToMax", "", CONV_DEFINE, API_RUNTIME}}, // 0x10 - // CU_CTX_MAP_HOST - {"cudaDeviceMapHost", {"hipDeviceMapHost", "", CONV_DEFINE, API_RUNTIME}}, // 0x08 - // CU_CTX_FLAGS_MASK - {"cudaDeviceMask", {"hipDeviceMask", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1f - // no analogue - {"cudaDevicePropDontCare", {"hipDevicePropDontCare", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, - // CU_CTX_SCHED_AUTO - {"cudaDeviceScheduleAuto", {"hipDeviceScheduleAuto", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_CTX_SCHED_SPIN - {"cudaDeviceScheduleSpin", {"hipDeviceScheduleSpin", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_CTX_SCHED_YIELD - {"cudaDeviceScheduleYield", {"hipDeviceScheduleYield", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_CTX_SCHED_BLOCKING_SYNC - {"cudaDeviceScheduleBlockingSync", {"hipDeviceScheduleBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_CTX_SCHED_MASK - {"cudaDeviceScheduleMask", {"hipDeviceScheduleMask", "", CONV_DEFINE, API_RUNTIME}}, // 0x07 - // CU_EVENT_DEFAULT - {"cudaEventDefault", {"hipEventDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_EVENT_BLOCKING_SYNC - {"cudaEventBlockingSync", {"hipEventBlockingSync", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_EVENT_DISABLE_TIMING - {"cudaEventDisableTiming", {"hipEventDisableTiming", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_EVENT_INTERPROCESS - {"cudaEventInterprocess", {"hipEventInterprocess", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CUDA_EXTERNAL_MEMORY_DEDICATED - {"cudaExternalMemoryDedicated", {"hipExternalMemoryDedicated", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC - {"cudaExternalSemaphoreSignalSkipNvSciBufMemSync", {"hipExternalSemaphoreSignalSkipNvSciBufMemSync", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC - {"cudaExternalSemaphoreWaitSkipNvSciBufMemSync", {"hipExternalSemaphoreWaitSkipNvSciBufMemSync", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x02 - // CUDA_NVSCISYNC_ATTR_SIGNAL - {"cudaNvSciSyncAttrSignal", {"hipNvSciSyncAttrSignal", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x1 - // CUDA_NVSCISYNC_ATTR_WAIT - {"cudaNvSciSyncAttrWait", {"hipNvSciSyncAttrWait", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x2 - // no analogue - {"cudaHostAllocDefault", {"hipHostMallocDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_MEMHOSTALLOC_PORTABLE - {"cudaHostAllocPortable", {"hipHostMallocPortable", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEMHOSTALLOC_DEVICEMAP - {"cudaHostAllocMapped", {"hipHostMallocMapped", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEMHOSTALLOC_WRITECOMBINED - {"cudaHostAllocWriteCombined", {"hipHostMallocWriteCombined", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // no analogue - {"cudaHostRegisterDefault", {"hipHostRegisterDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_MEMHOSTREGISTER_PORTABLE - {"cudaHostRegisterPortable", {"hipHostRegisterPortable", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEMHOSTREGISTER_DEVICEMAP - {"cudaHostRegisterMapped", {"hipHostRegisterMapped", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEMHOSTREGISTER_IOMEMORY - {"cudaHostRegisterIoMemory", {"hipHostRegisterIoMemory", "", CONV_DEFINE, API_RUNTIME}}, // 0x04 - // CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS - {"cudaIpcMemLazyEnablePeerAccess", {"hipIpcMemLazyEnablePeerAccess", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEM_ATTACH_GLOBAL - {"cudaMemAttachGlobal", {"hipMemAttachGlobal", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_MEM_ATTACH_HOST - {"cudaMemAttachHost", {"hipMemAttachHost", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // CU_MEM_ATTACH_SINGLE - {"cudaMemAttachSingle", {"hipMemAttachSingle", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x04 - // no analogue - {"cudaTextureType1D", {"hipTextureType1D", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // no analogue - {"cudaTextureType2D", {"hipTextureType2D", "", CONV_DEFINE, API_RUNTIME}}, // 0x02 - // no analogue - {"cudaTextureType3D", {"hipTextureType3D", "", CONV_DEFINE, API_RUNTIME}}, // 0x03 - // no analogue - {"cudaTextureTypeCubemap", {"hipTextureTypeCubemap", "", CONV_DEFINE, API_RUNTIME}}, // 0x0C - // no analogue - {"cudaTextureType1DLayered", {"hipTextureType1DLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xF1 - // no analogue - {"cudaTextureType2DLayered", {"hipTextureType2DLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xF2 - // no analogue - {"cudaTextureTypeCubemapLayered", {"hipTextureTypeCubemapLayered", "", CONV_DEFINE, API_RUNTIME}}, // 0xFC - // CU_OCCUPANCY_DEFAULT - {"cudaOccupancyDefault", {"hipOccupancyDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE - {"cudaOccupancyDisableCachingOverride", {"hipOccupancyDisableCachingOverride", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // 0x01 - // CU_STREAM_DEFAULT - {"cudaStreamDefault", {"hipStreamDefault", "", CONV_DEFINE, API_RUNTIME}}, // 0x00 - // CU_STREAM_NON_BLOCKING - {"cudaStreamNonBlocking", {"hipStreamNonBlocking", "", CONV_DEFINE, API_RUNTIME}}, // 0x01 - // CU_STREAM_LEGACY ((CUstream)0x1) - {"cudaStreamLegacy", {"hipStreamLegacy", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((cudaStream_t)0x1) - // CU_STREAM_PER_THREAD ((CUstream)0x2) - {"cudaStreamPerThread", {"hipStreamPerThread", "", CONV_DEFINE, API_RUNTIME, HIP_UNSUPPORTED}}, // ((cudaStream_t)0x2) -}; diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp deleted file mode 100644 index 1df1bb9cba..0000000000 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_functions.cpp +++ /dev/null @@ -1,758 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -const std::map CUDA_SPARSE_FUNCTION_MAP{ - // 5. cuSPARSE Helper Function Reference - {"cusparseCreate", {"hipsparseCreate", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateSolveAnalysisInfo", {"hipsparseCreateSolveAnalysisInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateHybMat", {"hipsparseCreateHybMat", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateMatDescr", {"hipsparseCreateMatDescr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroy", {"hipsparseDestroy", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroySolveAnalysisInfo", {"hipsparseDestroySolveAnalysisInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyHybMat", {"hipsparseDestroyHybMat", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyMatDescr", {"hipsparseDestroyMatDescr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetLevelInfo", {"hipsparseGetLevelInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseGetMatDiagType", {"hipsparseGetMatDiagType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatFillMode", {"hipsparseGetMatFillMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatIndexBase", {"hipsparseGetMatIndexBase", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetMatType", {"hipsparseGetMatType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetPointerMode", {"hipsparseGetPointerMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetVersion", {"hipsparseGetVersion", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatDiagType", {"hipsparseSetMatDiagType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatFillMode", {"hipsparseSetMatFillMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatIndexBase", {"hipsparseSetMatIndexBase", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetMatType", {"hipsparseSetMatType", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetPointerMode", {"hipsparseSetPointerMode", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseSetStream", {"hipsparseSetStream", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseGetStream", {"hipsparseGetStream", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsv2Info", {"hipsparseCreateCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrsv2Info", {"hipsparseDestroyCsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateCsrsm2Info", {"hipsparseCreateCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsrsm2Info", {"hipsparseDestroyCsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsric02Info", {"hipsparseCreateCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsric02Info", {"hipsparseDestroyCsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsrilu02Info", {"hipsparseCreateCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrilu02Info", {"hipsparseDestroyCsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreateBsrsv2Info", {"hipsparseCreateBsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrsv2Info", {"hipsparseDestroyBsrsv2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsrsm2Info", {"hipsparseCreateBsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrsm2Info", {"hipsparseDestroyBsrsm2Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsric02Inf", {"hipsparseCreateBsric02Inf", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsric02Info", {"hipsparseDestroyBsric02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateBsrilu02Info", {"hipsparseCreateBsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyBsrilu02Info", {"hipsparseDestroyBsrilu02Info", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsrgemm2Info", {"hipsparseCreateCsrgemm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDestroyCsrgemm2Info", {"hipsparseDestroyCsrgemm2Info", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCreatePruneInfo", {"hipsparseCreatePruneInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyPruneInfo", {"hipsparseDestroyPruneInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 6. cuSPARSE Level 1 Function Reference - {"cusparseSaxpyi", {"hipsparseSaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDaxpyi", {"hipsparseDaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCaxpyi", {"hipsparseCaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZaxpyi", {"hipsparseZaxpyi", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSdoti", {"hipsparseSdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDdoti", {"hipsparseDdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCdoti", {"hipsparseCdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZdoti", {"hipsparseZdoti", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCdotci", {"hipsparseCdotci", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZdotci", {"hipsparseZdotci", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSgthr", {"hipsparseSgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDgthr", {"hipsparseDgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCgthr", {"hipsparseCgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZgthr", {"hipsparseZgthr", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSgthrz", {"hipsparseSgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDgthrz", {"hipsparseDgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCgthrz", {"hipsparseCgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZgthrz", {"hipsparseZgthrz", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSroti", {"hipsparseSroti", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDroti", {"hipsparseDroti", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSsctr", {"hipsparseSsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDsctr", {"hipsparseDsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCsctr", {"hipsparseCsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZsctr", {"hipsparseZsctr", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 7. cuSPARSE Level 2 Function Reference - {"cusparseSbsrmv", {"hipsparseSbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrmv", {"hipsparseDbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrmv", {"hipsparseCbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrmv", {"hipsparseZbsrmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrxmv", {"hipsparseSbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrxmv", {"hipsparseDbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrxmv", {"hipsparseCbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrxmv", {"hipsparseZbsrxmv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrmv", {"hipsparseScsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmv", {"hipsparseDcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmv", {"hipsparseCcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmv", {"hipsparseZcsrmv", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCsrmvEx", {"hipsparseCsrmvEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrmvEx_bufferSize", {"hipsparseCsrmvEx_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrmv_mp", {"hipsparseScsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrmv_mp", {"hipsparseDcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrmv_mp", {"hipsparseCcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrmv_mp", {"hipsparseZcsrmv_mp", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemvi", {"hipsparseSgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemvi", {"hipsparseDgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemvi", {"hipsparseCgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemvi", {"hipsparseZgemvi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemvi_bufferSize", {"hipsparseSgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemvi_bufferSize", {"hipsparseDgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemvi_bufferSize", {"hipsparseCgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemvi_bufferSize", {"hipsparseZgemvi_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_bufferSize", {"hipsparseSbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrsv2_bufferSizeExt", {"hipsparseSbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_bufferSize", {"hipsparseDbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_bufferSizeExt", {"hipsparseDbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_bufferSize", {"hipsparseCbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_bufferSizeExt", {"hipsparseCbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_bufferSize", {"hipsparseZbsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_bufferSizeExt", {"hipsparseZbsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_analysis", {"hipsparseSbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_analysis", {"hipsparseDbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_analysis", {"hipsparseCbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_analysis", {"hipsparseZbsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv_solve", {"hipsparseScsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsv_solve", {"hipsparseDcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsv_solve", {"hipsparseCcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsv_solve", {"hipsparseZcsrsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsv2_solve", {"hipsparseSbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsv2_solve", {"hipsparseDbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsv2_solve", {"hipsparseCbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsv2_solve", {"hipsparseZbsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXbsrsv2_zeroPivot", {"hipsparseXbsrsv2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv_analysis", {"hipsparseScsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsv_analysis", {"hipsparseDcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsv_analysis", {"hipsparseCcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsv_analysis", {"hipsparseZcsrsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseCsrsv_analysisEx", {"hipsparseCsrsv_analysisEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrsv_solveEx", {"hipsparseCsrsv_solveEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsv2_bufferSize", {"hipsparseScsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrsv2_bufferSizeExt", {"hipsparseScsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_bufferSize", {"hipsparseDcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_bufferSizeExt", {"hipsparseDcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_bufferSize", {"hipsparseCcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_bufferSizeExt", {"hipsparseCcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_bufferSize", {"hipsparseZcsrsv2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_bufferSizeExt", {"hipsparseZcsrsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsv2_analysis", {"hipsparseScsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_analysis", {"hipsparseDcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_analysis", {"hipsparseCcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_analysis", {"hipsparseZcsrsv2_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsv2_solve", {"hipsparseScsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrsv2_solve", {"hipsparseDcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrsv2_solve", {"hipsparseCcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrsv2_solve", {"hipsparseZcsrsv2_solve", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcsrsv2_zeroPivot", {"hipsparseXcsrsv2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseShybmv", {"hipsparseShybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDhybmv", {"hipsparseDhybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseChybmv", {"hipsparseChybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZhybmv", {"hipsparseZhybmv", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseShybsv_analysis", {"hipsparseShybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhybsv_analysis", {"hipsparseDhybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChybsv_analysis", {"hipsparseChybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhybsv_analysis", {"hipsparseZhybsv_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShybsv_solve", {"hipsparseShybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhybsv_solve", {"hipsparseDhybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChybsv_solve", {"hipsparseChybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhybsv_solve", {"hipsparseZhybsv_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 8. cuSPARSE Level 3 Function Reference - {"cusparseScsrmm", {"hipsparseScsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmm", {"hipsparseDcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmm", {"hipsparseCcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmm", {"hipsparseZcsrmm", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrmm2", {"hipsparseScsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrmm2", {"hipsparseDcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrmm2", {"hipsparseCcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrmm2", {"hipsparseZcsrmm2", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrsm_analysis", {"hipsparseScsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm_analysis", {"hipsparseDcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm_analysis", {"hipsparseCcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm_analysis", {"hipsparseZcsrsm_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsm_solve", {"hipsparseScsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm_solve", {"hipsparseDcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm_solve", {"hipsparseCcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm_solve", {"hipsparseZcsrsm_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsm2_bufferSizeExt", {"hipsparseScsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_bufferSizeExt", {"hipsparseDcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_bufferSizeExt", {"hipsparseCcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_bufferSizeExt", {"hipsparseZcsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsm2_analysis", {"hipsparseScsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_analysis", {"hipsparseDcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_analysis", {"hipsparseCcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_analysis", {"hipsparseZcsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrsm2_solve", {"hipsparseScsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrsm2_solve", {"hipsparseDcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrsm2_solve", {"hipsparseCcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrsm2_solve", {"hipsparseZcsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrsm2_zeroPivot", {"hipsparseXcsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrmm", {"hipsparseSbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrmm", {"hipsparseDbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrmm", {"hipsparseCbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrmm", {"hipsparseZbsrmm", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrsm2_bufferSizeExt", {"hipsparseCbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_bufferSize", {"hipsparseDbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_bufferSizeExt", {"hipsparseDbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_bufferSize", {"hipsparseCbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_bufferSizeExt", {"hipsparseCbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_bufferSize", {"hipsparseZbsrsm2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_bufferSizeExt", {"hipsparseZbsrsm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_analysis", {"hipsparseSbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_analysis", {"hipsparseDbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_analysis", {"hipsparseCbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_analysis", {"hipsparseZbsrsm2_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrsm2_solve", {"hipsparseSbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrsm2_solve", {"hipsparseDbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrsm2_solve", {"hipsparseCbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrsm2_solve", {"hipsparseZbsrsm2_solve", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsrsm2_zeroPivot", {"hipsparseXbsrsm2_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgemmi", {"hipsparseSgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgemmi", {"hipsparseDgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgemmi", {"hipsparseCgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgemmi", {"hipsparseZgemmi", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 9. cuSPARSE Extra Function Reference - {"cusparseXcsrgeamNnz", {"hipsparseXcsrgeamNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrgeam", {"hipsparseScsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam", {"hipsparseDcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam", {"hipsparseCcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam", {"hipsparseZcsrgeam", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrgeam2Nnz", {"hipsparseXcsrgeam2Nnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsrgeam2", {"hipsparseScsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam2", {"hipsparseDcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam2", {"hipsparseCcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam2", {"hipsparseZcsrgeam2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrgeam2_bufferSizeExt", {"hipsparseScsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrgeam2_bufferSizeExt", {"hipsparseDcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrgeam2_bufferSizeExt", {"hipsparseCcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrgeam2_bufferSizeExt", {"hipsparseZcsrgeam2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsrgemmNnz", {"hipsparseXcsrgemmNnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrgemm", {"hipsparseScsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm", {"hipsparseDcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm", {"hipsparseCcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm", {"hipsparseZcsrgemm", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcsrgemm2Nnz", {"hipsparseXcsrgemm2Nnz", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrgemm2", {"hipsparseScsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm2", {"hipsparseDcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm2", {"hipsparseCcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm2", {"hipsparseZcsrgemm2", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrgemm2_bufferSizeExt", {"hipsparseScsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrgemm2_bufferSizeExt", {"hipsparseDcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrgemm2_bufferSizeExt", {"hipsparseCcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrgemm2_bufferSizeExt", {"hipsparseZcsrgemm2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 10. cuSPARSE Preconditioners Reference - // 10.1. Incomplete Cholesky Factorization : level 0 - {"cusparseScsric0", {"hipsparseScsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric0", {"hipsparseDcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric0", {"hipsparseCcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric0", {"hipsparseZcsric0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02_bufferSize", {"hipsparseScsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsric02_bufferSizeExt", {"hipsparseScsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_bufferSize", {"hipsparseDcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_bufferSizeExt", {"hipsparseDcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_bufferSize", {"hipsparseCcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_bufferSizeExt", {"hipsparseCcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_bufferSize", {"hipsparseZcsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_bufferSizeExt", {"hipsparseZcsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02_analysis", {"hipsparseScsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02_analysis", {"hipsparseDcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02_analysis", {"hipsparseCcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02_analysis", {"hipsparseZcsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsric02", {"hipsparseScsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsric02", {"hipsparseDcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsric02", {"hipsparseCcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsric02", {"hipsparseZcsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsric02_zeroPivot", {"hipsparseXcsric02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsric02_bufferSize", {"hipsparseSbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsric02_bufferSizeExt", {"hipsparseSbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_bufferSize", {"hipsparseDbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_bufferSizeExt", {"hipsparseDbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_bufferSize", {"hipsparseCbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_bufferSizeExt", {"hipsparseCbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_bufferSize", {"hipsparseZbsric02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_bufferSizeExt", {"hipsparseZbsric02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsric02_analysis", {"hipsparseSbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02_analysis", {"hipsparseDbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02_analysis", {"hipsparseCbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02_analysis", {"hipsparseZbsric02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsric02", {"hipsparseSbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsric02", {"hipsparseDbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsric02", {"hipsparseCbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsric02", {"hipsparseZbsric02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsric02_zeroPivot", {"hipsparseXbsric02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.2. Incomplete LU Factorization: level 0 - {"cusparseScsrilu0", {"hipsparseScsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrilu0", {"hipsparseDcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrilu0", {"hipsparseCcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrilu0", {"hipsparseZcsrilu0", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseCsrilu0Ex", {"hipsparseCsrilu0Ex", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsrilu02_numericBoost", {"hipsparseScsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrilu02_numericBoost", {"hipsparseDcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrilu02_numericBoost", {"hipsparseCcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrilu02_numericBoost", {"hipsparseZcsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseXcsrilu02_zeroPivot", {"hipsparseXcsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02_bufferSize", {"hipsparseScsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseScsrilu02_bufferSizeExt", {"hipsparseScsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_bufferSize", {"hipsparseDcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_bufferSizeExt", {"hipsparseDcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_bufferSize", {"hipsparseCcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_bufferSizeExt", {"hipsparseCcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_bufferSize", {"hipsparseZcsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_bufferSizeExt", {"hipsparseZcsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02_analysis", {"hipsparseScsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02_analysis", {"hipsparseDcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02_analysis", {"hipsparseCcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02_analysis", {"hipsparseZcsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsrilu02", {"hipsparseScsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsrilu02", {"hipsparseDcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsrilu02", {"hipsparseCcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsrilu02", {"hipsparseZcsrilu02", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXbsric02_zeroPivot", {"hipsparseXcsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSbsrilu02_numericBoost", {"hipsparseSbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_numericBoost", {"hipsparseDbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_numericBoost", {"hipsparseCbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_numericBoost", {"hipsparseZbsrilu02_numericBoost", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02_bufferSize", {"hipsparseSbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSbsrilu02_bufferSizeExt", {"hipsparseSbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_bufferSize", {"hipsparseDbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_bufferSizeExt", {"hipsparseDbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_bufferSize", {"hipsparseCbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_bufferSizeExt", {"hipsparseCbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_bufferSize", {"hipsparseZbsrilu02_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_bufferSizeExt", {"hipsparseZbsrilu02_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02_analysis", {"hipsparseSbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02_analysis", {"hipsparseDbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02_analysis", {"hipsparseCbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02_analysis", {"hipsparseZbsrilu02_analysis", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSbsrilu02", {"hipsparseSbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsrilu02", {"hipsparseDbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsrilu02", {"hipsparseCbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsrilu02", {"hipsparseZbsrilu02", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXbsrilu02_zeroPivot", {"hipsparseXbsrilu02_zeroPivot", "", CONV_LIB_FUNC, API_SPARSE}}, - - // 10.3. Tridiagonal Solve - {"cusparseSgtsv", {"hipsparseSgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv", {"hipsparseDgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv", {"hipsparseCgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv", {"hipsparseZgtsv", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv_nopivot", {"hipsparseSgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv_nopivot", {"hipsparseDgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv_nopivot", {"hipsparseCgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv_nopivot", {"hipsparseZgtsv_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_bufferSizeExt", {"hipsparseSgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_bufferSizeExt", {"hipsparseDgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_bufferSizeExt", {"hipsparseCgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_bufferSizeExt", {"hipsparseZgtsv2_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2", {"hipsparseSgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2", {"hipsparseDgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2", {"hipsparseCgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2", {"hipsparseZgtsv2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_nopivot_bufferSizeExt", {"hipsparseSgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_nopivot_bufferSizeExt", {"hipsparseDgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_nopivot_bufferSizeExt", {"hipsparseCgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_nopivot_bufferSizeExt", {"hipsparseZgtsv2_nopivot_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2_nopivot", {"hipsparseSgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2_nopivot", {"hipsparseDgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2_nopivot", {"hipsparseCgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2_nopivot", {"hipsparseZgtsv2_nopivot", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.4. Batched Tridiagonal Solve - {"cusparseSgtsvStridedBatch", {"hipsparseSgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvStridedBatch", {"hipsparseDgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvStridedBatch", {"hipsparseCgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvStridedBatch", {"hipsparseZgtsvStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2StridedBatch_bufferSizeExt", {"hipsparseSgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2StridedBatch_bufferSizeExt", {"hipsparseDgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2StridedBatch_bufferSizeExt", {"hipsparseCgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2StridedBatch_bufferSizeExt", {"hipsparseZgtsv2StridedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsv2StridedBatch", {"hipsparseSgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsv2StridedBatch", {"hipsparseDgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsv2StridedBatch", {"hipsparseCgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsv2StridedBatch", {"hipsparseZgtsv2StridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsvInterleavedBatch_bufferSizeExt", {"hipsparseSgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvInterleavedBatch_bufferSizeExt", {"hipsparseDgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvInterleavedBatch_bufferSizeExt", {"hipsparseCgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvInterleavedBatch_bufferSizeExt", {"hipsparseZgtsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgtsvInterleavedBatch", {"hipsparseSgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgtsvInterleavedBatch", {"hipsparseDgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgtsvInterleavedBatch", {"hipsparseCgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgtsvInterleavedBatch", {"hipsparseZgtsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 10.5. Batched Pentadiagonal Solve - {"cusparseSgpsvInterleavedBatch_bufferSizeExt", {"hipsparseSgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgpsvInterleavedBatch_bufferSizeExt", {"hipsparseDgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgpsvInterleavedBatch_bufferSizeExt", {"hipsparseCgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgpsvInterleavedBatch_bufferSizeExt", {"hipsparseZgpsvInterleavedBatch_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgpsvInterleavedBatch", {"hipsparseSgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgpsvInterleavedBatch", {"hipsparseDgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgpsvInterleavedBatch", {"hipsparseCgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgpsvInterleavedBatch", {"hipsparseZgpsvInterleavedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 11. cuSPARSE Matrix Reorderings Reference - {"cusparseScsrcolor", {"hipsparseScsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsrcolor", {"hipsparseDcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsrcolor", {"hipsparseCcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsrcolor", {"hipsparseZcsrcolor", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 12. cuSPARSE Format Conversion Reference - {"cusparseSbsr2csr", {"hipsparseSbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDbsr2csr", {"hipsparseDbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCbsr2csr", {"hipsparseCbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZbsr2csr", {"hipsparseZbsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsc_bufferSize", {"hipsparseSgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsc_bufferSizeExt", {"hipsparseSgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc_bufferSize", {"hipsparseDgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc_bufferSizeExt", {"hipsparseDgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc_bufferSize", {"hipsparseCgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc_bufferSizeExt", {"hipsparseCgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc_bufferSize", {"hipsparseZgebsr2gebsc_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc_bufferSizeExt", {"hipsparseZgebsr2gebsc_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsc", {"hipsparseSgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsc", {"hipsparseDgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsc", {"hipsparseCgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsc", {"hipsparseZgebsr2gebsc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSgebsr2gebsr_bufferSize", {"hipsparseSgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsr_bufferSizeExt", {"hipsparseSgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr_bufferSize", {"hipsparseDgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr_bufferSizeExt", {"hipsparseDgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr_bufferSize", {"hipsparseCgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr_bufferSizeExt", {"hipsparseCgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr_bufferSize", {"hipsparseZgebsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr_bufferSizeExt", {"hipsparseZgebsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXgebsr2csr", {"hipsparseXgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2csr", {"hipsparseSgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2csr", {"hipsparseDgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2csr", {"hipsparseCgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2csr", {"hipsparseZgebsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXgebsr2gebsrNnz", {"hipsparseXgebsr2gebsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSgebsr2gebsr", {"hipsparseSgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDgebsr2gebsr", {"hipsparseDgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCgebsr2gebsr", {"hipsparseCgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZgebsr2gebsr", {"hipsparseZgebsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2gebsr_bufferSize", {"hipsparseScsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2gebsr_bufferSizeExt", {"hipsparseScsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr_bufferSize", {"hipsparseDcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr_bufferSizeExt", {"hipsparseDcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr_bufferSize", {"hipsparseCcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr_bufferSizeExt", {"hipsparseCcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr_bufferSize", {"hipsparseZcsr2gebsr_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr_bufferSizeExt", {"hipsparseZcsr2gebsr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2gebsrNnz", {"hipsparseXcsr2gebsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2gebsr", {"hipsparseScsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2gebsr", {"hipsparseDcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2gebsr", {"hipsparseCcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2gebsr", {"hipsparseZcsr2gebsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcoo2csr", {"hipsparseXcoo2csr", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsc2dense", {"hipsparseScsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsc2dense", {"hipsparseDcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsc2dense", {"hipsparseCcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsc2dense", {"hipsparseZcsc2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsc2hyb", {"hipsparseScsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsc2hyb", {"hipsparseDcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsc2hyb", {"hipsparseCcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsc2hyb", {"hipsparseZcsc2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2bsrNnz", {"hipsparseXcsr2bsrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseScsr2bsr", {"hipsparseScsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2bsr", {"hipsparseDcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2bsr", {"hipsparseCcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2bsr", {"hipsparseZcsr2bsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseXcsr2coo", {"hipsparseXcsr2coo", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseScsr2csc", {"hipsparseScsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsr2csc", {"hipsparseDcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsr2csc", {"hipsparseCcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsr2csc", {"hipsparseZcsr2csc", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCsr2cscEx", {"hipsparseCsr2cscEx", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsr2cscEx2", {"hipsparseCsr2cscEx2", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsr2cscEx2_bufferSize", {"hipsparseCsr2cscEx2_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2dense", {"hipsparseScsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2dense", {"hipsparseDcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2dense", {"hipsparseCcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2dense", {"hipsparseZcsr2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2csr_compress", {"hipsparseScsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2csr_compress", {"hipsparseDcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2csr_compress", {"hipsparseCcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2csr_compress", {"hipsparseZcsr2csr_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2hyb", {"hipsparseScsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseDcsr2hyb", {"hipsparseDcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseCcsr2hyb", {"hipsparseCcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseZcsr2hyb", {"hipsparseZcsr2hyb", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseSdense2csc", {"hipsparseSdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2csc", {"hipsparseDdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2csc", {"hipsparseCdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2csc", {"hipsparseZdense2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSdense2csr", {"hipsparseSdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2csr", {"hipsparseDdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2csr", {"hipsparseCdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2csr", {"hipsparseZdense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSdense2hyb", {"hipsparseSdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDdense2hyb", {"hipsparseDdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCdense2hyb", {"hipsparseCdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZdense2hyb", {"hipsparseZdense2hyb", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2csc", {"hipsparseShyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2csc", {"hipsparseDhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2csc", {"hipsparseChyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2csc", {"hipsparseZhyb2csc", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2csr", {"hipsparseShyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2csr", {"hipsparseDhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2csr", {"hipsparseChyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2csr", {"hipsparseZhyb2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseShyb2dense", {"hipsparseShyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDhyb2dense", {"hipsparseDhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseChyb2dense", {"hipsparseChyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZhyb2dense", {"hipsparseZhyb2dense", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSnnz", {"hipsparseSnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnnz", {"hipsparseDnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCnnz", {"hipsparseCnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZnnz", {"hipsparseZnnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseCreateIdentityPermutation", {"hipsparseCreateIdentityPermutation", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcoosort_bufferSizeExt", {"hipsparseXcoosort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcoosortByRow", {"hipsparseXcoosortByRow", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcoosortByColumn", {"hipsparseXcoosortByColumn", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcsrsort_bufferSizeExt", {"hipsparseXcsrsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcsrsort", {"hipsparseXcsrsort", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseXcscsort_bufferSizeExt", {"hipsparseXcscsort_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE}}, - {"cusparseXcscsort", {"hipsparseXcscsort", "", CONV_LIB_FUNC, API_SPARSE}}, - - {"cusparseCreateCsru2csrInfo", {"hipsparseCreateCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyCsru2csrInfo", {"hipsparseDestroyCsru2csrInfo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsru2csr_bufferSizeExt", {"hipsparseScsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsru2csr_bufferSizeExt", {"hipsparseDcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsru2csr_bufferSizeExt", {"hipsparseCcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsru2csr_bufferSizeExt", {"hipsparseZcsru2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseScsr2csru", {"hipsparseScsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDcsr2csru", {"hipsparseDcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCcsr2csru", {"hipsparseCcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZcsr2csru", {"hipsparseZcsr2csru", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csr", {"hipsparseHpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csr", {"hipsparseSpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csr", {"hipsparseDpruneDense2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csr_bufferSizeExt", {"hipsparseHpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csr_bufferSizeExt", {"hipsparseSpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csr_bufferSizeExt", {"hipsparseDpruneDense2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrNnz", {"hipsparseHpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrNnz", {"hipsparseSpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrNnz", {"hipsparseDpruneDense2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csr", {"hipsparseHpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csr", {"hipsparseSpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csr", {"hipsparseDpruneCsr2csr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csr_bufferSizeExt", {"hipsparseHpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csr_bufferSizeExt", {"hipsparseSpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csr_bufferSizeExt", {"hipsparseDpruneCsr2csr_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrNnz", {"hipsparseHpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrNnz", {"hipsparseSpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrNnz", {"hipsparseDpruneCsr2csrNnz", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrByPercentage", {"hipsparseHpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrByPercentage", {"hipsparseSpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrByPercentage", {"hipsparseDpruneDense2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseHpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseSpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrByPercentage_bufferSizeExt", {"hipsparseDpruneDense2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneDense2csrNnzByPercentage", {"hipsparseHpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneDense2csrNnzByPercentage", {"hipsparseSpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneDense2csrNnzByPercentage", {"hipsparseDpruneDense2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrByPercentage", {"hipsparseHpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrByPercentage", {"hipsparseSpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage", {"hipsparseDpruneCsr2csrByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseHpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseSpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrByPercentage_bufferSizeExt", {"hipsparseDpruneCsr2csrByPercentage_bufferSizeExt", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHpruneCsr2csrNnzByPercentage", {"hipsparseHpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpruneCsr2csrNnzByPercentage", {"hipsparseSpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDpruneCsr2csrNnzByPercentage", {"hipsparseDpruneCsr2csrNnzByPercentage", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSnnz_compress", {"hipsparseSnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnnz_compress", {"hipsparseDnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCnnz_compress", {"hipsparseCnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseZnnz_compress", {"hipsparseZnnz_compress", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // 13. cuSPARSE Generic API Reference - // Generic Sparse API helper functions - // Sparse Matrix descriptor - {"cusparseCreateCoo", {"hipsparseCreateCoo", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCooAoS", {"hipsparseCreateCooAoS", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCreateCsr", {"hipsparseCreateCsr", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroySpMat", {"hipsparseDestroySpMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCooGet", {"hipsparseCooGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCooAoSGet", {"hipsparseCooAoSGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseCsrGet", {"hipsparseCsrGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetFormat", {"hipsparseSpMatGetFormat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetIndexBase", {"hipsparseSpMatGetIndexBase", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetValues", {"hipsparseSpMatGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetValues", {"hipsparseSpMatSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetStridedBatch", {"hipsparseSpMatGetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetStridedBatch", {"hipsparseSpMatSetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatSetNumBatches", {"hipsparseSpMatSetNumBatches", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatGetNumBatches", {"hipsparseSpMatGetNumBatches", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - // Sparse Vector descriptor - {"cusparseCreateSpVec", {"hipsparseCreateSpVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroySpVec", {"hipsparseDestroySpVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGet", {"hipsparseSpVecGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGetIndexBase", {"hipsparseSpVecGetIndexBase", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecGetValues", {"hipsparseSpVecGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecSetValues", {"hipsparseSpVecSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Generic Dense API helper functions - // Dense Matrix descriptor - {"cusparseCreateDnMat", {"hipsparseCreateDnMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyDnMat", {"hipsparseDestroyDnMat", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGet", {"hipsparseDnMatGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGetValues", {"hipsparseDnMatGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatSetValues", {"hipsparseDnMatSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatSetStridedBatch", {"hipsparseDnMatSetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatGetStridedBatch", {"hipsparseDnMatGetStridedBatch", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - // Dense Vector descriptor - {"cusparseCreateDnVec", {"hipsparseCreateDnVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDestroyDnVec", {"hipsparseDestroyDnVec", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecGet", {"hipsparseDnVecGet", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecGetValues", {"hipsparseDnVecGetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecSetValues", {"hipsparseDnVecSetValues", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Matrix * Matrix Multiplication - {"cusparseSpMM", {"hipsparseSpMM", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMM_bufferSize", {"hipsparseSpMM_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Vector * Vector Multiplication - {"cusparseSpVV", {"hipsparseSpVV", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVV_bufferSize", {"hipsparseSpVV_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - - // Sparse Matrix * Vector Multiplication - {"cusparseSpMV", {"hipsparseSpMV", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMV_bufferSize", {"hipsparseSpMV_bufferSize", "", CONV_LIB_FUNC, API_SPARSE, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp b/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp deleted file mode 100644 index 1d3fe28c62..0000000000 --- a/hipify-clang/src/CUDA2HIP_SPARSE_API_types.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "CUDA2HIP.h" - -// Maps the names of CUDA SPARSE API types to the corresponding HIP types -const std::map CUDA_SPARSE_TYPE_NAME_MAP{ - - // 1. Structs - {"cusparseContext", {"hipsparseContext", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseHandle_t", {"hipsparseHandle_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseHybMat", {"hipsparseHybMat", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseHybMat_t", {"hipsparseHybMat_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseMatDescr", {"hipsparseMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseMatDescr_t", {"hipsparseMatDescr_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseSolveAnalysisInfo", {"hipsparseSolveAnalysisInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSolveAnalysisInfo_t", {"hipsparseSolveAnalysisInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrsv2Info", {"csrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrsv2Info_t", {"csrsv2Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"csrsm2Info", {"csrsm2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrsm2Info_t", {"csrsm2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"bsrsv2Info", {"bsrsv2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrsv2Info_t", {"bsrsv2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"bsrsm2Info", {"bsrsm2Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrsm2Info_t", {"bsrsm2Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"bsric02Info", {"bsric02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsric02Info_t", {"bsric02Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrilu02Info", {"csrilu02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csrilu02Info_t", {"csrilu02Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"bsrilu02Info", {"bsrilu02Info", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"bsrilu02Info_t", {"bsrilu02Info_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csru2csrInfo", {"csru2csrInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"csru2csrInfo_t", {"csru2csrInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"csrgemm2Info", {"csrgemm2Info", "", CONV_TYPE, API_SPARSE}}, - {"csrgemm2Info_t", {"csrgemm2Info_t", "", CONV_TYPE, API_SPARSE}}, - - {"cusparseColorInfo", {"hipsparseColorInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseColorInfo_t", {"hipsparseColorInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"pruneInfo", {"pruneInfo", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"pruneInfo_t", {"pruneInfo_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMatDescr", {"hipsparseSpMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpMatDescr_t", {"hipsparseSpMatDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseDnMatDescr", {"hipsparseDnMatDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnMatDescr_t", {"hipsparseDnMatDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpVecDescr", {"hipsparseSpVecDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseSpVecDescr_t", {"hipsparseSpVecDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseDnVecDescr", {"hipsparseDnVecDescr", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"cusparseDnVecDescr_t", {"hipsparseDnVecDescr_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - - // 2. Enums - {"cusparseAction_t", {"hipsparseAction_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_ACTION_SYMBOLIC", {"HIPSPARSE_ACTION_SYMBOLIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_ACTION_NUMERIC", {"HIPSPARSE_ACTION_NUMERIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseDirection_t", {"hipsparseDirection_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_ROW", {"HIPSPARSE_DIRECTION_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_DIRECTION_COLUMN", {"HIPSPARSE_DIRECTION_COLUMN", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseHybPartition_t", {"hipsparseHybPartition_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_AUTO", {"HIPSPARSE_HYB_PARTITION_AUTO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_USER", {"HIPSPARSE_HYB_PARTITION_USER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_HYB_PARTITION_MAX", {"HIPSPARSE_HYB_PARTITION_MAX", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseDiagType_t", {"hipsparseDiagType_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_DIAG_TYPE_NON_UNIT", {"HIPSPARSE_DIAG_TYPE_NON_UNIT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_DIAG_TYPE_UNIT", {"HIPSPARSE_DIAG_TYPE_UNIT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseFillMode_t", {"hipsparseFillMode_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_FILL_MODE_LOWER", {"HIPSPARSE_FILL_MODE_LOWER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_FILL_MODE_UPPER", {"HIPSPARSE_FILL_MODE_UPPER", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseIndexBase_t", {"hipsparseIndexBase_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_INDEX_BASE_ZERO", {"HIPSPARSE_INDEX_BASE_ZERO", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_INDEX_BASE_ONE", {"HIPSPARSE_INDEX_BASE_ONE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseMatrixType_t", {"hipsparseMatrixType_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_GENERAL", {"HIPSPARSE_MATRIX_TYPE_GENERAL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_SYMMETRIC", {"HIPSPARSE_MATRIX_TYPE_SYMMETRIC", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_HERMITIAN", {"HIPSPARSE_MATRIX_TYPE_HERMITIAN", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_MATRIX_TYPE_TRIANGULAR", {"HIPSPARSE_MATRIX_TYPE_TRIANGULAR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseOperation_t", {"hipsparseOperation_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_OPERATION_NON_TRANSPOSE", {"HIPSPARSE_OPERATION_NON_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_OPERATION_TRANSPOSE", {"HIPSPARSE_OPERATION_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE", {"HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparsePointerMode_t", {"hipsparsePointerMode_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_POINTER_MODE_HOST", {"HIPSPARSE_POINTER_MODE_HOST", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_POINTER_MODE_DEVICE", {"HIPSPARSE_POINTER_MODE_DEVICE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseAlgMode_t", {"hipsparseAlgMode_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG0", {"CUSPARSE_ALG0", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG1", {"CUSPARSE_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG_NAIVE", {"CUSPARSE_ALG_NAIVE", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ALG_MERGE_PATH", {"CUSPARSE_ALG_MERGE_PATH", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSolvePolicy_t", {"hipsparseSolvePolicy_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_SOLVE_POLICY_NO_LEVEL", {"HIPSPARSE_SOLVE_POLICY_NO_LEVEL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_SOLVE_POLICY_USE_LEVEL", {"HIPSPARSE_SOLVE_POLICY_USE_LEVEL", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseStatus_t", {"hipsparseStatus_t", "", CONV_TYPE, API_SPARSE}}, - {"CUSPARSE_STATUS_SUCCESS", {"HIPSPARSE_STATUS_SUCCESS", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_NOT_INITIALIZED", {"HIPSPARSE_STATUS_NOT_INITIALIZED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ALLOC_FAILED", {"HIPSPARSE_STATUS_ALLOC_FAILED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_INVALID_VALUE", {"HIPSPARSE_STATUS_INVALID_VALUE", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ARCH_MISMATCH", {"HIPSPARSE_STATUS_ARCH_MISMATCH", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_MAPPING_ERROR", {"HIPSPARSE_STATUS_MAPPING_ERROR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_EXECUTION_FAILED", {"HIPSPARSE_STATUS_EXECUTION_FAILED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_INTERNAL_ERROR", {"HIPSPARSE_STATUS_INTERNAL_ERROR", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", {"HIPSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - {"CUSPARSE_STATUS_ZERO_PIVOT", {"HIPSPARSE_STATUS_ZERO_PIVOT", "", CONV_NUMERIC_LITERAL, API_SPARSE}}, - - {"cusparseCsr2CscAlg_t", {"hipsparseCsr2CscAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSR2CSC_ALG1", {"HIPSPARSE_CSR2CSC_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSR2CSC_ALG2", {"HIPSPARSE_CSR2CSC_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseFormat_t", {"hipsparseFormat_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_CSR", {"HIPSPARSE_FORMAT_CSR", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_CSC", {"HIPSPARSE_FORMAT_CSC", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_COO", {"HIPSPARSE_FORMAT_COO", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_FORMAT_COO_AOS", {"HIPSPARSE_FORMAT_COO_AOS", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseOrder_t", {"hipsparseOrder_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ORDER_COL", {"HIPSPARSE_ORDER_COL", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_ORDER_ROW", {"HIPSPARSE_ORDER_ROW", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMVAlg_t", {"hipsparseSpMVAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_MV_ALG_DEFAULT", {"HIPSPARSE_MV_ALG_DEFAULT", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMV_ALG", {"HIPSPARSE_COOMV_ALG", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMV_ALG1", {"HIPSPARSE_CSRMV_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMV_ALG2", {"HIPSPARSE_CSRMV_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseSpMMAlg_t", {"hipsparseSpMMAlg_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_MM_ALG_DEFAULT", {"HIPSPARSE_MM_ALG_DEFAULT", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG1", {"HIPSPARSE_COOMM_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG2", {"HIPSPARSE_COOMM_ALG2", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_COOMM_ALG3", {"HIPSPARSE_COOMM_ALG3", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_CSRMM_ALG1", {"HIPSPARSE_CSRMM_ALG1", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - {"cusparseIndexType_t", {"hipsparseIndexType_t", "", CONV_TYPE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_16U", {"HIPSPARSE_INDEX_16U", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_32I", {"HIPSPARSE_INDEX_32I", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_INDEX_64I", {"HIPSPARSE_INDEX_64I", "", CONV_NUMERIC_LITERAL, API_SPARSE, HIP_UNSUPPORTED}}, - - // 3. Defines - {"CUSPARSE_VER_MAJOR", {"HIPSPARSE_VER_MAJOR", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_MINOR", {"HIPSPARSE_VER_MINOR", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_PATCH", {"HIPSPARSE_VER_PATCH", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VER_BUILD", {"HIPSPARSE_VER_BUILD", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, - {"CUSPARSE_VERSION", {"HIPSPARSE_VERSION", "", CONV_DEFINE, API_SPARSE, HIP_UNSUPPORTED}}, -}; diff --git a/hipify-clang/src/HipifyAction.cpp b/hipify-clang/src/HipifyAction.cpp deleted file mode 100644 index 75138c47ab..0000000000 --- a/hipify-clang/src/HipifyAction.cpp +++ /dev/null @@ -1,755 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include "HipifyAction.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Lex/HeaderSearch.h" -#include "LLVMCompat.h" -#include "CUDA2HIP.h" -#include "StringUtils.h" -#include "ArgParse.h" - -const std::string sHIP = "HIP"; -const std::string sROC = "ROC"; -const std::string sCub = "cub"; -const std::string sHipcub = "hipcub"; -const std::string sHIP_DYNAMIC_SHARED = "HIP_DYNAMIC_SHARED"; -const std::string sHIP_KERNEL_NAME = "HIP_KERNEL_NAME"; -std::string sHIP_SYMBOL = "HIP_SYMBOL"; -std::string s_reinterpret_cast = "reinterpret_cast"; -const std::string sHipLaunchKernelGGL = "hipLaunchKernelGGL"; -const std::string sDim3 = "dim3("; -const std::string s_hiprand_kernel_h = "hiprand_kernel.h"; -const std::string s_hiprand_h = "hiprand.h"; -const std::string sOnce = "once"; -const std::string s_string_literal = "[string literal]"; -// CUDA identifiers, used in matchers -const std::string sCudaMemcpyToSymbol = "cudaMemcpyToSymbol"; -const std::string sCudaMemcpyToSymbolAsync = "cudaMemcpyToSymbolAsync"; -const std::string sCudaGetSymbolSize = "cudaGetSymbolSize"; -const std::string sCudaGetSymbolAddress = "cudaGetSymbolAddress"; -const std::string sCudaMemcpyFromSymbol = "cudaMemcpyFromSymbol"; -const std::string sCudaMemcpyFromSymbolAsync = "cudaMemcpyFromSymbolAsync"; -const std::string sCudaFuncSetCacheConfig = "cudaFuncSetCacheConfig"; -const std::string sCudaFuncGetAttributes = "cudaFuncGetAttributes"; -// Matchers' names -const StringRef sCudaSharedIncompleteArrayVar = "cudaSharedIncompleteArrayVar"; -const StringRef sCudaLaunchKernel = "cudaLaunchKernel"; -const StringRef sCudaHostFuncCall = "cudaHostFuncCall"; -const StringRef sCudaDeviceFuncCall = "cudaDeviceFuncCall"; -const StringRef sCubNamespacePrefix = "cubNamespacePrefix"; -const StringRef sCubFunctionTemplateDecl = "cubFunctionTemplateDecl"; -const StringRef sCubUsingNamespaceDecl = "cubUsingNamespaceDecl"; - -std::set DeviceSymbolFunctions0 { - {sCudaMemcpyToSymbol}, - {sCudaMemcpyToSymbolAsync} -}; - -std::set DeviceSymbolFunctions1 { - {sCudaGetSymbolSize}, - {sCudaGetSymbolAddress}, - {sCudaMemcpyFromSymbol}, - {sCudaMemcpyFromSymbolAsync} -}; - -std::set ReinterpretFunctions{ - {sCudaFuncSetCacheConfig}, - {sCudaFuncGetAttributes} -}; - -std::set ReinterpretFunctions0{ - {sCudaFuncSetCacheConfig} -}; - -std::set ReinterpretFunctions1{ - {sCudaFuncGetAttributes} -}; - -void HipifyAction::RewriteString(StringRef s, clang::SourceLocation start) { - auto &SM = getCompilerInstance().getSourceManager(); - size_t begin = 0; - while ((begin = s.find("cu", begin)) != StringRef::npos) { - const size_t end = s.find_first_of(" ", begin + 4); - StringRef name = s.slice(begin, end); - const auto found = CUDA_RENAMES_MAP().find(name); - if (found != CUDA_RENAMES_MAP().end()) { - StringRef repName = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - hipCounter counter = {s_string_literal, "", ConvTypes::CONV_LITERAL, ApiTypes::API_RUNTIME, found->second.supportDegree}; - Statistics::current().incrementCounter(counter, name.str()); - if (!Statistics::isUnsupported(counter)) { - clang::SourceLocation sl = start.getLocWithOffset(begin + 1); - ct::Replacement Rep(SM, sl, name.size(), repName.str()); - clang::FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); - } - } - if (end == StringRef::npos) break; - begin = end + 1; - } -} - -clang::SourceLocation HipifyAction::GetSubstrLocation(const std::string &str, const clang::SourceRange &sr) { - clang::SourceLocation sl(sr.getBegin()); - clang::SourceLocation end(sr.getEnd()); - auto &SM = getCompilerInstance().getSourceManager(); - size_t length = SM.getCharacterData(end) - SM.getCharacterData(sl); - StringRef sfull = StringRef(SM.getCharacterData(sl), length); - size_t offset = sfull.find(str); - if (offset > 0) { - sl = sl.getLocWithOffset(offset); - } - return sl; -} - -/** - * Look at, and consider altering, a given token. - * - * If it's not a CUDA identifier, nothing happens. - * If it's an unsupported CUDA identifier, a warning is emitted. - * Otherwise, the source file is updated with the corresponding hipification. - */ -void HipifyAction::RewriteToken(const clang::Token &t) { - // String literals containing CUDA references need fixing. - if (t.is(clang::tok::string_literal)) { - StringRef s(t.getLiteralData(), t.getLength()); - RewriteString(unquoteStr(s), t.getLocation()); - return; - } else if (!t.isAnyIdentifier()) { - // If it's neither a string nor an identifier, we don't care. - return; - } - StringRef name = t.getRawIdentifier(); - clang::SourceLocation sl = t.getLocation(); - FindAndReplace(name, sl, CUDA_RENAMES_MAP()); -} - -void HipifyAction::FindAndReplace(StringRef name, - clang::SourceLocation sl, - const std::map &repMap, - bool bReplace) { - const auto found = repMap.find(name); - if (found == repMap.end()) { - // So it's an identifier, but not CUDA? Boring. - return; - } - Statistics::current().incrementCounter(found->second, name.str()); - clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - // Warn the user about unsupported identifier. - if (Statistics::isUnsupported(found->second)) { - std::string sWarn; - Statistics::isToRoc(found->second) ? sWarn = sROC : sWarn = sHIP; - sWarn = "" + sWarn; - const auto ID = DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "CUDA identifier is unsupported in %0."); - DE.Report(sl, ID) << sWarn; - return; - } - if (!bReplace) { - return; - } - StringRef repName = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - auto &SM = getCompilerInstance().getSourceManager(); - ct::Replacement Rep(SM, sl, name.size(), repName.str()); - clang::FullSourceLoc fullSL(sl, SM); - insertReplacement(Rep, fullSL); -} - -namespace { - -clang::SourceRange getReadRange(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - clang::SourceLocation begin = exprRange.getBegin(); - clang::SourceLocation end = exprRange.getEnd(); - bool beginSafe = !SM.isMacroBodyExpansion(begin) || clang::Lexer::isAtStartOfMacroExpansion(begin, SM, clang::LangOptions{}); - bool endSafe = !SM.isMacroBodyExpansion(end) || clang::Lexer::isAtEndOfMacroExpansion(end, SM, clang::LangOptions{}); - if (beginSafe && endSafe) { - return {SM.getFileLoc(begin), SM.getFileLoc(end)}; - } else { - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; - } -} - -clang::SourceRange getWriteRange(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - clang::SourceLocation begin = exprRange.getBegin(); - clang::SourceLocation end = exprRange.getEnd(); - // If the range is contained within a macro, update the macro definition. - // Otherwise, use the file location and hope for the best. - if (!SM.isMacroBodyExpansion(begin) || !SM.isMacroBodyExpansion(end)) { - return {SM.getExpansionLoc(begin), SM.getExpansionLoc(end)}; - } - return {SM.getSpellingLoc(begin), SM.getSpellingLoc(end)}; -} - -StringRef readSourceText(clang::SourceManager &SM, const clang::SourceRange &exprRange) { - return clang::Lexer::getSourceText(clang::CharSourceRange::getTokenRange(getReadRange(SM, exprRange)), SM, clang::LangOptions(), nullptr); -} - -/** - * Get a string representation of the expression `arg`, unless it's a defaulting function - * call argument, in which case get a 0. Used for building argument lists to kernel calls. - */ -std::string stringifyZeroDefaultedArg(clang::SourceManager &SM, const clang::Expr *arg) { - if (clang::isa(arg)) return "0"; - else return std::string(readSourceText(SM, arg->getSourceRange())); -} - -} // anonymous namespace - -bool HipifyAction::Exclude(const hipCounter &hipToken) { - switch (hipToken.type) { - case CONV_INCLUDE_CUDA_MAIN_H: - switch (hipToken.apiType) { - case API_DRIVER: - case API_RUNTIME: - if (insertedRuntimeHeader) return true; - insertedRuntimeHeader = true; - return false; - case API_BLAS: - if (insertedBLASHeader) return true; - insertedBLASHeader = true; - return false; - case API_RAND: - if (hipToken.hipName == s_hiprand_kernel_h) { - if (insertedRAND_kernelHeader) return true; - insertedRAND_kernelHeader = true; - return false; - } else if (hipToken.hipName == s_hiprand_h) { - if (insertedRANDHeader) return true; - insertedRANDHeader = true; - return false; - } - case API_DNN: - if (insertedDNNHeader) return true; - insertedDNNHeader = true; - return false; - case API_FFT: - if (insertedFFTHeader) return true; - insertedFFTHeader = true; - return false; - case API_COMPLEX: - if (insertedComplexHeader) return true; - insertedComplexHeader = true; - return false; - case API_SPARSE: - if (insertedSPARSEHeader) return true; - insertedSPARSEHeader = true; - return false; - default: - return false; - } - return false; - case CONV_INCLUDE: - if (hipToken.hipName.empty()) return true; - switch (hipToken.apiType) { - case API_RAND: - if (hipToken.hipName == s_hiprand_kernel_h) { - if (insertedRAND_kernelHeader) return true; - insertedRAND_kernelHeader = true; - } - return false; - default: - return false; - } - return false; - default: - return false; - } - return false; -} - -void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, - const clang::Token&, - StringRef file_name, - bool is_angled, - clang::CharSourceRange filename_range, - const clang::FileEntry*, StringRef, - StringRef, const clang::Module*) { - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(hash_loc)) return; - if (!firstHeader) { - firstHeader = true; - firstHeaderLoc = hash_loc; - } - const auto found = CUDA_INCLUDE_MAP.find(file_name); - if (found == CUDA_INCLUDE_MAP.end()) return; - bool exclude = Exclude(found->second); - Statistics::current().incrementCounter(found->second, file_name.str()); - clang::SourceLocation sl = filename_range.getBegin(); - if (Statistics::isUnsupported(found->second)) { - clang::DiagnosticsEngine &DE = getCompilerInstance().getDiagnostics(); - DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header.")); - return; - } - clang::StringRef newInclude; - // Keep the same include type that the user gave. - if (!exclude) { - clang::SmallString<128> includeBuffer; - llvm::StringRef name = Statistics::isToRoc(found->second) ? found->second.rocName : found->second.hipName; - if (is_angled) newInclude = llvm::Twine("<" + name+ ">").toStringRef(includeBuffer); - else newInclude = llvm::Twine("\"" + name + "\"").toStringRef(includeBuffer); - } else { - // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. - sl = hash_loc; - } - const char *B = SM.getCharacterData(sl); - const char *E = SM.getCharacterData(filename_range.getEnd()); - ct::Replacement Rep(SM, sl, E - B, newInclude.str()); - insertReplacement(Rep, clang::FullSourceLoc{sl, SM}); -} - -void HipifyAction::PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) { - if (pragmaOnce) return; - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(Loc)) return; - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - clang::Token tok; - PP.Lex(tok); - StringRef Text(SM.getCharacterData(tok.getLocation()), tok.getLength()); - if (Text == sOnce) { - pragmaOnce = true; - pragmaOnceLoc = tok.getEndLoc(); - } -} - -bool HipifyAction::cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result) { - auto *launchKernel = Result.Nodes.getNodeAs(sCudaLaunchKernel); - if (!launchKernel) return false; - auto *calleeExpr = launchKernel->getCallee(); - if (!calleeExpr) return false; - auto *caleeDecl = launchKernel->getDirectCallee(); - if (!caleeDecl) return false; - auto *config = launchKernel->getConfig(); - if (!config) return false; - clang::SmallString<40> XStr; - llvm::raw_svector_ostream OS(XStr); - clang::LangOptions DefaultLangOptions; - auto *SM = Result.SourceManager; - clang::SourceRange sr = calleeExpr->getSourceRange(); - std::string kern = readSourceText(*SM, sr).str(); - OS << sHipLaunchKernelGGL << "("; - if (caleeDecl->isTemplateInstantiation()) { - OS << sHIP_KERNEL_NAME << "("; - std::string cub = sCub + "::"; - std::string hipcub; - const auto found = CUDA_CUB_TYPE_NAME_MAP.find(sCub); - if (found != CUDA_CUB_TYPE_NAME_MAP.end()) { - hipcub = found->second.hipName.str() + "::"; - } else { - hipcub = sHipcub + "::"; - } - size_t pos = kern.find(cub); - while (pos != std::string::npos) { - kern.replace(pos, cub.size(), hipcub); - pos = kern.find(cub, pos + hipcub.size()); - } - } - OS << kern; - if (caleeDecl->isTemplateInstantiation()) OS << ")"; - OS << ", "; - // Next up are the four kernel configuration parameters, the last two of which are optional and default to zero. - // Copy the two dimensional arguments verbatim. - for (unsigned int i = 0; i < 2; ++i) { - const std::string sArg = readSourceText(*SM, config->getArg(i)->getSourceRange()).str(); - bool bDim3 = std::equal(sDim3.begin(), sDim3.end(), sArg.c_str()); - OS << (bDim3 ? "" : sDim3) << sArg << (bDim3 ? "" : ")") << ", "; - } - // The stream/memory arguments default to zero if omitted. - OS << stringifyZeroDefaultedArg(*SM, config->getArg(2)) << ", "; - OS << stringifyZeroDefaultedArg(*SM, config->getArg(3)); - // If there are ordinary arguments to the kernel, just copy them verbatim into our new call. - int numArgs = launchKernel->getNumArgs(); - if (numArgs > 0) { - OS << ", "; - // Start of the first argument. - clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0)); - // End of the last argument. - clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1)); - OS << readSourceText(*SM, {argStart, argEnd}); - } - OS << ")"; - clang::SourceLocation launchKernelExprLocBeg = launchKernel->getExprLoc(); - clang::SourceLocation launchKernelExprLocEnd = launchKernelExprLocBeg.isMacroID() ? llcompat::getEndOfExpansionRangeForLoc(*SM, launchKernelExprLocBeg) : llcompat::getEndLoc(launchKernel); - clang::SourceLocation launchKernelEnd = llcompat::getEndLoc(launchKernel); - clang::BeforeThanCompare isBefore(*SM); - launchKernelExprLocEnd = isBefore(launchKernelEnd, launchKernelExprLocEnd) ? launchKernelExprLocEnd : launchKernelEnd; - clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernelExprLocBeg, launchKernelExprLocEnd}); - clang::SourceLocation launchBeg = replacementRange.getBegin(); - clang::SourceLocation launchEnd = replacementRange.getEnd(); - if (isBefore(launchBeg, launchEnd)) { - size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchBeg); - ct::Replacement Rep(*SM, launchBeg, length, OS.str()); - clang::FullSourceLoc fullSL(launchBeg, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = {sHipLaunchKernelGGL, "", ConvTypes::CONV_KERNEL_LAUNCH, ApiTypes::API_RUNTIME}; - Statistics::current().incrementCounter(counter, sCudaLaunchKernel.str()); - return true; - } - return false; -} - -bool HipifyAction::cudaSharedIncompleteArrayVar(const mat::MatchFinder::MatchResult &Result) { - auto *sharedVar = Result.Nodes.getNodeAs(sCudaSharedIncompleteArrayVar); - if (!sharedVar) return false; - // Example: extern __shared__ uint sRadix1[]; - if (!sharedVar->hasExternalFormalLinkage()) return false; - clang::QualType QT = sharedVar->getType(); - std::string typeName; - if (QT->isIncompleteArrayType()) { - const clang::ArrayType *AT = QT.getTypePtr()->getAsArrayTypeUnsafe(); - QT = AT->getElementType(); - if (QT.getTypePtr()->isBuiltinType()) { - QT = QT.getCanonicalType(); - auto *BT = clang::dyn_cast(QT); - if (BT) { - clang::LangOptions LO; - LO.CUDA = true; - clang::PrintingPolicy policy(LO); - typeName = std::string(BT->getName(policy)); - } - } else { - typeName = QT.getAsString(); - } - } - if (!typeName.empty()) { - clang::SourceLocation slStart = sharedVar->getOuterLocStart(); - clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); - auto *SM = Result.SourceManager; - size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; - std::string varName = sharedVar->getNameAsString(); - std::string repName = sHIP_DYNAMIC_SHARED + "(" + typeName + ", " + varName + ")"; - ct::Replacement Rep(*SM, slStart, repLength, repName); - clang::FullSourceLoc fullSL(slStart, *SM); - insertReplacement(Rep, fullSL); - hipCounter counter = {sHIP_DYNAMIC_SHARED, "", ConvTypes::CONV_EXTERN_SHARED, ApiTypes::API_RUNTIME}; - Statistics::current().incrementCounter(counter, sCudaSharedIncompleteArrayVar.str()); - return true; - } - return false; -} - -bool HipifyAction::cudaDeviceFuncCall(const mat::MatchFinder::MatchResult &Result) { - if (const clang::CallExpr *call = Result.Nodes.getNodeAs(sCudaDeviceFuncCall)) { - auto *funcDcl = call->getDirectCallee(); - if (!funcDcl) return false; - FindAndReplace(funcDcl->getDeclName().getAsString(), llcompat::getBeginLoc(call), CUDA_DEVICE_FUNC_MAP, false); - return true; - } - return false; -} - -bool HipifyAction::cubNamespacePrefix(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubNamespacePrefix)) { - clang::QualType QT = decl->getUnderlyingType(); - auto *t = QT.getTypePtr(); - if (!t) return false; - const clang::ElaboratedType *et = t->getAs(); - if (!et) return false; - const clang::NestedNameSpecifier *nns = et->getQualifier(); - if (!nns) return false; - const clang::NamespaceDecl *nsd = nns->getAsNamespace(); - if (!nsd) return false; - const clang::TypeSourceInfo *si = decl->getTypeSourceInfo(); - const clang::TypeLoc tloc = si->getTypeLoc(); - const clang::SourceRange sr = tloc.getSourceRange(); - std::string name = nsd->getDeclName().getAsString(); - FindAndReplace(name, GetSubstrLocation(name, sr), CUDA_CUB_TYPE_NAME_MAP); - return true; - } - return false; -} - -bool HipifyAction::cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubUsingNamespaceDecl)) { - if (auto nsd = decl->getNominatedNamespace()) { - FindAndReplace(nsd->getDeclName().getAsString(), decl->getIdentLocation(), CUDA_CUB_TYPE_NAME_MAP); - return true; - } - } - return false; -} - -bool HipifyAction::cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result) { - if (auto *decl = Result.Nodes.getNodeAs(sCubFunctionTemplateDecl)) { - auto *Tparams = decl->getTemplateParameters(); - bool ret = false; - for (size_t I = 0; I < Tparams->size(); ++I) { - const clang::ValueDecl *valueDecl = dyn_cast(Tparams->getParam(I)); - if (!valueDecl) continue; - clang::QualType QT = valueDecl->getType(); - auto *t = QT.getTypePtr(); - if (!t) continue; - const clang::ElaboratedType *et = t->getAs(); - if (!et) continue; - const clang::NestedNameSpecifier *nns = et->getQualifier(); - if (!nns) continue; - const clang::NamespaceDecl *nsd = nns->getAsNamespace(); - if (!nsd) continue; - const clang::SourceRange sr = valueDecl->getSourceRange(); - std::string name = nsd->getDeclName().getAsString(); - FindAndReplace(name, GetSubstrLocation(name, sr), CUDA_CUB_TYPE_NAME_MAP); - ret = true; - } - return ret; - } - return false; -} - -bool HipifyAction::cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result) { - if (auto *call = Result.Nodes.getNodeAs(sCudaHostFuncCall)) { - if (!call->getNumArgs()) return false; - auto *funcDcl = call->getDirectCallee(); - if (!funcDcl) return false; - std::string sName = funcDcl->getDeclName().getAsString(); - unsigned int argNum = 0; - bool b_reinterpret = (ReinterpretFunctions.find(sName) != ReinterpretFunctions.end()) ? true : false; - if (DeviceSymbolFunctions0.find(sName) != DeviceSymbolFunctions0.end() || sCudaFuncSetCacheConfig == sName) { - argNum = 0; - } else if (call->getNumArgs() > 1 && (DeviceSymbolFunctions1.find(sName) != DeviceSymbolFunctions1.end() || sCudaFuncGetAttributes == sName)) { - argNum = 1; - } else { - return false; - } - clang::SmallString<40> XStr; - llvm::raw_svector_ostream OS(XStr); - clang::SourceRange sr = call->getArg(argNum)->getSourceRange(); - auto *SM = Result.SourceManager; - OS << (b_reinterpret ? s_reinterpret_cast : sHIP_SYMBOL) << "(" << readSourceText(*SM, sr) << ")"; - clang::SourceRange replacementRange = getWriteRange(*SM, { sr.getBegin(), sr.getEnd() }); - clang::SourceLocation s = replacementRange.getBegin(); - clang::SourceLocation e = replacementRange.getEnd(); - clang::LangOptions DefaultLangOptions; - size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(e, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(s); - ct::Replacement Rep(*SM, s, length, OS.str()); - clang::FullSourceLoc fullSL(s, *SM); - insertReplacement(Rep, fullSL); - return true; - } - return false; -} - -void HipifyAction::insertReplacement(const ct::Replacement &rep, const clang::FullSourceLoc &fullSL) { - llcompat::insertReplacement(*replacements, rep); - if (PrintStats) { - rep.getLength(); - Statistics::current().lineTouched(fullSL.getExpansionLineNumber()); - Statistics::current().bytesChanged(rep.getLength()); - } -} - -std::unique_ptr HipifyAction::CreateASTConsumer(clang::CompilerInstance &CI, StringRef) { - Finder.reset(new mat::MatchFinder); - // Replace the <<<...>>> language extension with a hip kernel launch - Finder->addMatcher(mat::cudaKernelCallExpr(mat::isExpansionInMainFile()).bind(sCudaLaunchKernel), this); - Finder->addMatcher( - mat::varDecl( - mat::isExpansionInMainFile(), - mat::allOf( - mat::hasAttr(clang::attr::CUDAShared), - mat::hasType(mat::incompleteArrayType()) - ) - ).bind(sCudaSharedIncompleteArrayVar), - this - ); - Finder->addMatcher( - mat::callExpr( - mat::isExpansionInMainFile(), - mat::callee( - mat::functionDecl( - mat::hasAnyName( - sCudaGetSymbolAddress, - sCudaGetSymbolSize, - sCudaMemcpyFromSymbol, - sCudaMemcpyFromSymbolAsync, - sCudaMemcpyToSymbol, - sCudaMemcpyToSymbolAsync, - sCudaFuncSetCacheConfig, - sCudaFuncGetAttributes - ) - ) - ) - ).bind(sCudaHostFuncCall), - this - ); - Finder->addMatcher( - mat::callExpr( - mat::isExpansionInMainFile(), - mat::callee( - mat::functionDecl( - mat::anyOf( - mat::hasAttr(clang::attr::CUDADevice), - mat::hasAttr(clang::attr::CUDAGlobal) - ), - mat::unless(mat::hasAttr(clang::attr::CUDAHost)) - ) - ) - ).bind(sCudaDeviceFuncCall), - this - ); - Finder->addMatcher( - mat::typedefDecl( - mat::isExpansionInMainFile(), - mat::hasType( - mat::elaboratedType( - mat::hasQualifier( - mat::specifiesNamespace( - mat::hasName(sCub) - ) - ) - ) - ) - ).bind(sCubNamespacePrefix), - this - ); - // TODO: Maybe worth to make it more concrete based on final cubFunctionTemplateDecl - Finder->addMatcher( - mat::functionTemplateDecl( - mat::isExpansionInMainFile() - ).bind(sCubFunctionTemplateDecl), - this - ); - // TODO: Maybe worth to make it more concrete - Finder->addMatcher( - mat::usingDirectiveDecl( - mat::isExpansionInMainFile() - ).bind(sCubUsingNamespaceDecl), - this - ); - // Ownership is transferred to the caller. - return Finder->newASTConsumer(); -} - -void HipifyAction::Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) { - auto &SM = getCompilerInstance().getSourceManager(); - if (!SM.isWrittenInMainFile(Loc)) return; - StringRef Text(SM.getCharacterData(MacroNameTok.getLocation()), MacroNameTok.getLength()); - Ifndefs.insert(std::make_pair(Text.str(), MacroNameTok.getEndLoc())); -} - -void HipifyAction::EndSourceFileAction() { - // Insert the hip header, if we didn't already do it by accident during substitution. - if (!insertedRuntimeHeader) { - // It's not sufficient to just replace CUDA headers with hip ones, because numerous CUDA headers are - // implicitly included by the compiler. Instead, we _delete_ CUDA headers, and unconditionally insert - // one copy of the hip include into every file. - bool placeForIncludeCalculated = false; - clang::SourceLocation sl, controllingMacroLoc; - auto &SM = getCompilerInstance().getSourceManager(); - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - clang::HeaderSearch &HS = PP.getHeaderSearchInfo(); - clang::ExternalPreprocessorSource *EPL = HS.getExternalLookup(); - const clang::FileEntry *FE = SM.getFileEntryForID(SM.getMainFileID()); - const clang::IdentifierInfo *controllingMacro = HS.getFileInfo(FE).getControllingMacro(EPL); - if (controllingMacro) { - auto found = Ifndefs.find(controllingMacro->getName().str()); - if (found != Ifndefs.end()) { - controllingMacroLoc = found->second; - placeForIncludeCalculated = true; - } - } - if (pragmaOnce) { - if (placeForIncludeCalculated) sl = pragmaOnceLoc < controllingMacroLoc ? pragmaOnceLoc : controllingMacroLoc; - else sl = pragmaOnceLoc; - placeForIncludeCalculated = true; - } - if (!placeForIncludeCalculated) { - if (firstHeader) sl = firstHeaderLoc; - else sl = SM.getLocForStartOfFile(SM.getMainFileID()); - } - clang::FullSourceLoc fullSL(sl, SM); - ct::Replacement Rep(SM, sl, 0, "\n#include \n"); - insertReplacement(Rep, fullSL); - } - clang::ASTFrontendAction::EndSourceFileAction(); -} - -namespace { - -/** - * A silly little class to proxy PPCallbacks back to the HipifyAction class. - */ -class PPCallbackProxy : public clang::PPCallbacks { - HipifyAction &hipifyAction; - -public: - explicit PPCallbackProxy(HipifyAction &action): hipifyAction(action) {} - - void InclusionDirective(clang::SourceLocation hash_loc, const clang::Token &include_token, - StringRef file_name, bool is_angled, clang::CharSourceRange filename_range, - const clang::FileEntry *file, StringRef search_path, StringRef relative_path, - const clang::Module *imported -#if LLVM_VERSION_MAJOR > 6 - , clang::SrcMgr::CharacteristicKind FileType -#endif - ) override { - hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported); - } - - void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) override { - hipifyAction.PragmaDirective(Loc, Introducer); - } - - void Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD) override { - hipifyAction.Ifndef(Loc, MacroNameTok, MD); - } -}; -} - -bool HipifyAction::BeginInvocation(clang::CompilerInstance &CI) { - llcompat::RetainExcludedConditionalBlocks(CI); - return true; -} - -void HipifyAction::ExecuteAction() { - clang::Preprocessor &PP = getCompilerInstance().getPreprocessor(); - auto &SM = getCompilerInstance().getSourceManager(); - // Start lexing the specified input file. - const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID()); - clang::Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts()); - RawLex.SetKeepWhitespaceMode(true); - // Perform a token-level rewrite of CUDA identifiers to hip ones. The raw-mode lexer gives us enough - // information to tell the difference between identifiers, string literals, and "other stuff". It also - // ignores preprocessor directives, so this transformation will operate inside preprocessor-deleted code. - clang::Token RawTok; - RawLex.LexFromRawLexer(RawTok); - while (RawTok.isNot(clang::tok::eof)) { - RewriteToken(RawTok); - RawLex.LexFromRawLexer(RawTok); - } - // Register yourself as the preprocessor callback, by proxy. - PP.addPPCallbacks(std::unique_ptr(new PPCallbackProxy(*this))); - // Now we're done futzing with the lexer, have the subclass proceeed with Sema and AST matching. - clang::ASTFrontendAction::ExecuteAction(); -} - -void HipifyAction::run(const mat::MatchFinder::MatchResult &Result) { - if (cudaLaunchKernel(Result)) return; - if (cudaSharedIncompleteArrayVar(Result)) return; - if (cudaHostFuncCall(Result)) return; - if (cudaDeviceFuncCall(Result)) return; - if (cubNamespacePrefix(Result)) return; - if (cubFunctionTemplateDecl(Result)) return; - if (cubUsingNamespaceDecl(Result)) return; -} diff --git a/hipify-clang/src/HipifyAction.h b/hipify-clang/src/HipifyAction.h deleted file mode 100644 index f70d17dd0b..0000000000 --- a/hipify-clang/src/HipifyAction.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Lex/PPCallbacks.h" -#include "clang/Tooling/Tooling.h" -#include "clang/Tooling/Core/Replacement.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "ReplacementsFrontendActionFactory.h" -#include "Statistics.h" - -namespace ct = clang::tooling; -namespace mat = clang::ast_matchers; -using namespace llvm; - -/** - * A FrontendAction that hipifies CUDA programs. - */ -class HipifyAction : public clang::ASTFrontendAction, - public mat::MatchFinder::MatchCallback { -private: - ct::Replacements *replacements; - std::map Ifndefs; - std::unique_ptr Finder; - // CUDA implicitly adds its runtime header. We rewrite explicitly-provided CUDA includes with equivalent - // ones, and track - using this flag - if the result led to us including the hip runtime header. If it did - // not, we insert it at the top of the file when we finish processing it. - // This approach means we do the best it's possible to do w.r.t preserving the user's include order. - bool insertedRuntimeHeader = false; - bool insertedBLASHeader = false; - bool insertedRANDHeader = false; - bool insertedRAND_kernelHeader = false; - bool insertedDNNHeader = false; - bool insertedFFTHeader = false; - bool insertedSPARSEHeader = false; - bool insertedComplexHeader = false; - bool firstHeader = false; - bool pragmaOnce = false; - clang::SourceLocation firstHeaderLoc; - clang::SourceLocation pragmaOnceLoc; - // Rewrite a string literal to refer to hip, not CUDA. - void RewriteString(StringRef s, clang::SourceLocation start); - // Replace a CUDA identifier with the corresponding hip identifier, if applicable. - void RewriteToken(const clang::Token &t); - // Calculate str's SourceLocation in SourceRange sr - clang::SourceLocation GetSubstrLocation(const std::string &str, const clang::SourceRange &sr); - -public: - explicit HipifyAction(ct::Replacements *replacements): clang::ASTFrontendAction(), - replacements(replacements) {} - // MatchCallback listeners - bool cudaLaunchKernel(const mat::MatchFinder::MatchResult &Result); - bool cudaSharedIncompleteArrayVar(const mat::MatchFinder::MatchResult &Result); - bool cudaDeviceFuncCall(const mat::MatchFinder::MatchResult &Result); - bool cudaHostFuncCall(const mat::MatchFinder::MatchResult &Result); - bool cubNamespacePrefix(const mat::MatchFinder::MatchResult &Result); - bool cubFunctionTemplateDecl(const mat::MatchFinder::MatchResult &Result); - bool cubUsingNamespaceDecl(const mat::MatchFinder::MatchResult &Result); - // Called by the preprocessor for each include directive during the non-raw lexing pass. - void InclusionDirective(clang::SourceLocation hash_loc, - const clang::Token &include_token, - StringRef file_name, - bool is_angled, - clang::CharSourceRange filename_range, - const clang::FileEntry *file, - StringRef search_path, - StringRef relative_path, - const clang::Module *imported); - // Called by the preprocessor for each pragma directive during the non-raw lexing pass. - void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer); - // Called by the preprocessor for each ifndef directive during the non-raw lexing pass. - // Found ifndef will be used in EndSourceFileAction() for catching include guard controlling macro. - void Ifndef(clang::SourceLocation Loc, const clang::Token &MacroNameTok, const clang::MacroDefinition &MD); - -protected: - // Add a Replacement for the current file. These will all be applied after executing the FrontendAction. - void insertReplacement(const ct::Replacement &rep, const clang::FullSourceLoc &fullSL); - // FrontendAction entry point. - void ExecuteAction() override; - // Callback before starting processing a single input; used by hipify-clang for setting Preprocessor options. - bool BeginInvocation(clang::CompilerInstance &CI) override; - // Called at the start of each new file to process. - void EndSourceFileAction() override; - // MatchCallback API entry point. Called by the AST visitor while searching the AST for things we registered an interest for. - void run(const mat::MatchFinder::MatchResult &Result) override; - std::unique_ptr CreateASTConsumer(clang::CompilerInstance &CI, StringRef InFile) override; - bool Exclude(const hipCounter &hipToken); - void FindAndReplace(StringRef name, clang::SourceLocation sl, const std::map &repMap, bool bReplace = true); -}; diff --git a/hipify-clang/src/LLVMCompat.cpp b/hipify-clang/src/LLVMCompat.cpp deleted file mode 100644 index f6d74121e4..0000000000 --- a/hipify-clang/src/LLVMCompat.cpp +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "ArgParse.h" -#include "LLVMCompat.h" -#include "llvm/Support/Path.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Frontend/CompilerInstance.h" - -const std::string sHipify = "[HIPIFY] ", sConflict = "conflict: ", sError = "error: ", sWarning = "warning: "; - -namespace llcompat { - -void PrintStackTraceOnErrorSignal() { - // The signature of PrintStackTraceOnErrorSignal changed in llvm 3.9. We don't support - // anything older than 3.8, so let's specifically detect the one old version we support. -#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) - llvm::sys::PrintStackTraceOnErrorSignal(); -#else - llvm::sys::PrintStackTraceOnErrorSignal(StringRef()); -#endif -} - -ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file) { -#if LLVM_VERSION_MAJOR > 3 - // getReplacements() now returns a map from filename to Replacements - so create an entry - // for this source file and return a reference to it. - return Tool.getReplacements()[std::string(file)]; -#else - return Tool.getReplacements(); -#endif -} - -void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep) { -#if LLVM_VERSION_MAJOR > 3 - // New clang added error checking to Replacements, and *insists* that you explicitly check it. - llvm::consumeError(replacements.add(rep)); -#else - // In older versions, it's literally an std::set - replacements.insert(rep); -#endif -} - -void EnterPreprocessorTokenStream(clang::Preprocessor &_pp, const clang::Token *start, size_t len, bool DisableMacroExpansion) { -#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) - _pp.EnterTokenStream(start, len, false, DisableMacroExpansion); -#else - #if (LLVM_VERSION_MAJOR < 9) - _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion); - #else - _pp.EnterTokenStream(clang::ArrayRef{start, len}, DisableMacroExpansion, false); - #endif -#endif -} - -clang::SourceLocation getBeginLoc(const clang::Stmt *stmt) { -#if LLVM_VERSION_MAJOR < 8 - return stmt->getLocStart(); -#else - return stmt->getBeginLoc(); -#endif -} - -clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc) { -#if LLVM_VERSION_MAJOR < 8 - return typeLoc.getLocStart(); -#else - return typeLoc.getBeginLoc(); -#endif -} - -clang::SourceLocation getEndLoc(const clang::Stmt *stmt) { -#if LLVM_VERSION_MAJOR < 8 - return stmt->getLocEnd(); -#else - return stmt->getEndLoc(); -#endif -} - -clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc) { -#if LLVM_VERSION_MAJOR < 8 - return typeLoc.getLocEnd(); -#else - return typeLoc.getEndLoc(); -#endif -} - -std::error_code real_path(const Twine &path, SmallVectorImpl &output, - bool expand_tilde) { -#if LLVM_VERSION_MAJOR < 5 - output.clear(); - std::string s = path.str(); - output.append(s.begin(), s.end()); - if (sys::path::is_relative(path)) { - return sys::fs::make_absolute(output); - } - return std::error_code(); -#else - return sys::fs::real_path(path, output, expand_tilde); -#endif -} - -bool pragma_once_outside_header() { -#if LLVM_VERSION_MAJOR < 4 - return false; -#else - return true; -#endif -} - -void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI) { -#if LLVM_VERSION_MAJOR > 9 - clang::PreprocessorOptions &PPOpts = CI.getPreprocessorOpts(); - PPOpts.RetainExcludedConditionalBlocks = !SkipExcludedPPConditionalBlocks; -#endif -} - -bool CheckCompatibility() { -#if LLVM_VERSION_MAJOR < 10 - if (SkipExcludedPPConditionalBlocks) { - llvm::errs() << "\n" << sHipify << sWarning << "Option '" << SkipExcludedPPConditionalBlocks.ArgStr.str() << "' is supported starting from LLVM version 10.0\n"; - } -#endif - return true; -} - -clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc) { -#if LLVM_VERSION_MAJOR > 6 - return SM.getExpansionRange(loc).getEnd(); -#else - return SM.getExpansionRange(loc).second; -#endif -} - -} // namespace llcompat diff --git a/hipify-clang/src/LLVMCompat.h b/hipify-clang/src/LLVMCompat.h deleted file mode 100644 index 48e008d40d..0000000000 --- a/hipify-clang/src/LLVMCompat.h +++ /dev/null @@ -1,94 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include -#include - -namespace ct = clang::tooling; - -extern const std::string sHipify, sConflict, sError, sWarning; - -// Things for papering over the differences between different LLVM versions. - -namespace llcompat { -/** - * The getNumArgs function on macros was rather unhelpfully renamed in clang 4.0. Its semantics - * remain unchanged, so let's be slightly ugly about it here. :D - */ -#if LLVM_VERSION_MAJOR > 4 - #define GET_NUM_ARGS() getNumParams() -#else - #define GET_NUM_ARGS() getNumArgs() -#endif - -#if LLVM_VERSION_MAJOR < 7 - #define LLVM_DEBUG(X) DEBUG(X) -#endif - -clang::SourceLocation getBeginLoc(const clang::Stmt *stmt); -clang::SourceLocation getBeginLoc(const clang::TypeLoc &typeLoc); - -clang::SourceLocation getEndLoc(const clang::Stmt *stmt); -clang::SourceLocation getEndLoc(const clang::TypeLoc &typeLoc); - -void PrintStackTraceOnErrorSignal(); - -using namespace llvm; - -/** - * Get the replacement map for a given filename in a RefactoringTool. - * - * Older LLVM versions don't actually support multiple filenames, so everything all gets - * smushed together. It is the caller's responsibility to cope with this. - */ -ct::Replacements &getReplacements(ct::RefactoringTool &Tool, StringRef file); - -/** - * Add a Replacement to a Replacements. - */ -void insertReplacement(ct::Replacements &replacements, const ct::Replacement &rep); - -/** - * Version-agnostic version of Preprocessor::EnterTokenStream(). - */ -void EnterPreprocessorTokenStream(clang::Preprocessor &_pp, - const clang::Token *start, - size_t len, - bool DisableMacroExpansion); - -std::error_code real_path(const Twine &path, SmallVectorImpl &output, - bool expand_tilde = false); - -bool pragma_once_outside_header(); - -void RetainExcludedConditionalBlocks(clang::CompilerInstance &CI); - -bool CheckCompatibility(); - -clang::SourceLocation getEndOfExpansionRangeForLoc(const clang::SourceManager &SM, const clang::SourceLocation &loc); - -} // namespace llcompat diff --git a/hipify-clang/src/ReplacementsFrontendActionFactory.h b/hipify-clang/src/ReplacementsFrontendActionFactory.h deleted file mode 100644 index 92d77655af..0000000000 --- a/hipify-clang/src/ReplacementsFrontendActionFactory.h +++ /dev/null @@ -1,55 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include "clang/Tooling/Tooling.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Tooling/Core/Replacement.h" - -namespace ct = clang::tooling; - -/** - * A FrontendActionFactory that propagates a set of Replacements into the FrontendAction. - * This is necessary boilerplate for using a custom FrontendAction with a RefactoringTool. - * - * @tparam T The FrontendAction to create. - */ -template -class ReplacementsFrontendActionFactory : public ct::FrontendActionFactory { - ct::Replacements *replacements; - -public: - explicit ReplacementsFrontendActionFactory(ct::Replacements *r): - ct::FrontendActionFactory(), - replacements(r) {} - -#if LLVM_VERSION_MAJOR < 10 - clang::FrontendAction *create() override { - return new T(replacements); - } -#else - std::unique_ptr create() override { - return std::unique_ptr(new T(replacements)); - } -#endif -}; diff --git a/hipify-clang/src/Statistics.cpp b/hipify-clang/src/Statistics.cpp deleted file mode 100644 index 1f7713cd88..0000000000 --- a/hipify-clang/src/Statistics.cpp +++ /dev/null @@ -1,368 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "Statistics.h" -#include -#include -#include -#include "ArgParse.h" - -const char *counterNames[NUM_CONV_TYPES] = { - "error", // CONV_ERROR - "init", // CONV_INIT - "version", // CONV_VERSION - "device", // CONV_DEVICE - "context", // CONV_CONTEXT - "module", // CONV_MODULE - "memory", // CONV_MEMORY - "virtual_memory", // CONV_VIRTUAL_MEMORY - "addressing", // CONV_ADDRESSING - "stream", // CONV_STREAM - "event", // CONV_EVENT - "external_resource_interop", // CONV_EXT_RES - "stream_memory", // CONV_STREAM_MEMORY - "execution", // CONV_EXECUTION - "graph", // CONV_GRAPH - "occupancy", // CONV_OCCUPANCY - "texture", // CONV_TEXTURE - "surface", // CONV_SURFACE - "peer", // CONV_PEER - "graphics", // CONV_GRAPHICS - "profiler", // CONV_PROFILER - "openGL", // CONV_OPENGL - "D3D9", // CONV_D3D9 - "D3D10", // CONV_D3D10 - "D3D11", // CONV_D3D11 - "VDPAU", // CONV_VDPAU - "EGL", // CONV_EGL - "thread", // CONV_THREAD - "complex", // CONV_COMPLEX - "library", // CONV_LIB_FUNC - "device_library", // CONV_LIB_DEVICE_FUNC - "device_function", // CONV_DEVICE_FUNC - "include", // CONV_INCLUDE - "include_cuda_main_header", // CONV_INCLUDE_CUDA_MAIN_H - "type", // CONV_TYPE - "literal", // CONV_LITERAL - "numeric_literal", // CONV_NUMERIC_LITERAL - "define", // CONV_DEFINE - "extern_shared", // CONV_EXTERN_SHARED - "kernel_launch" // CONV_KERNEL_LAUNCH -}; - -const char *counterTypes[NUM_CONV_TYPES] = { - "CONV_ERROR", - "CONV_INIT", - "CONV_VERSION", - "CONV_DEVICE", - "CONV_CONTEXT", - "CONV_MODULE", - "CONV_MEMORY", - "CONV_VIRTUAL_MEMORY", - "CONV_ADDRESSING", - "CONV_STREAM", - "CONV_EVENT", - "CONV_EXT_RES", - "CONV_STREAM_MEMORY", - "CONV_EXECUTION", - "CONV_GRAPH", - "CONV_OCCUPANCY", - "CONV_TEXTURE", - "CONV_SURFACE", - "CONV_PEER", - "CONV_GRAPHICS", - "CONV_PROFILER", - "CONV_OPENGL", - "CONV_D3D9", - "CONV_D3D10", - "CONV_D3D11", - "CONV_VDPAU", - "CONV_EGL", - "CONV_THREAD", - "CONV_COMPLEX", - "CONV_LIB_FUNC", - "CONV_LIB_DEVICE_FUNC", - "CONV_INCLUDE", - "CONV_INCLUDE_CUDA_MAIN_H", - "CONV_TYPE", - "CONV_LITERAL", - "CONV_NUMERIC_LITERAL", - "CONV_DEFINE", - "CONV_EXTERN_SHARED", - "CONV_KERNEL_LAUNCH" -}; - -const char *apiNames[NUM_API_TYPES] = { - "CUDA Driver API", - "CUDA RT API", - "cuComplex API", - "cuBLAS API", - "cuRAND API", - "cuDNN API", - "cuFFT API", - "cuSPARSE API", - "CUB API", - "CAFFE2 API" -}; - -const char *apiTypes[NUM_API_TYPES] = { - "API_DRIVER", - "API_RUNTIME", - "API_COMPLEX", - "API_BLAS", - "API_RAND", - "API_DNN", - "API_FFT", - "API_CUB", - "API_SPARSE", - "API_CAFFE2" -}; - -namespace { - -template -void conditionalPrint(ST *stream1, - ST2* stream2, - const std::string& s1, - const std::string& s2) { - if (stream1) { - *stream1 << s1; - } - if (stream2) { - *stream2 << s2; - } -} - -// Print a named stat value to both the terminal and the CSV file. -template -void printStat(std::ostream *csv, llvm::raw_ostream* printOut, const std::string &name, T value) { - if (printOut) { - *printOut << " " << name << ": " << value << "\n"; - } - if (csv) { - *csv << name << ";" << value << "\n"; - } -} - -} // Anonymous namespace - -void StatCounter::incrementCounter(const hipCounter &counter, const std::string &name) { - counters[name]++; - apiCounters[(int) counter.apiType]++; - convTypeCounters[(int) counter.type]++; -} - -void StatCounter::add(const StatCounter &other) { - for (const auto &p : other.counters) { - counters[p.first] += p.second; - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - apiCounters[i] += other.apiCounters[i]; - } - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - convTypeCounters[i] += other.convTypeCounters[i]; - } -} - -int StatCounter::getConvSum() { - int acc = 0; - for (const int &i : convTypeCounters) { - acc += i; - } - return acc; -} - -void StatCounter::print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string &prefix) { - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (convTypeCounters[i] > 0) { - conditionalPrint(csv, printOut, "\nCUDA ref type;Count\n", "[HIPIFY] info: " + prefix + " refs by type:\n"); - break; - } - } - for (int i = 0; i < NUM_CONV_TYPES; ++i) { - if (convTypeCounters[i] > 0) { - printStat(csv, printOut, counterNames[i], convTypeCounters[i]); - } - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - if (apiCounters[i] > 0) { - conditionalPrint(csv, printOut, "\nCUDA API;Count\n", "[HIPIFY] info: " + prefix + " refs by API:\n"); - break; - } - } - for (int i = 0; i < NUM_API_TYPES; ++i) { - if (apiCounters[i] > 0) { - printStat(csv, printOut, apiNames[i], apiCounters[i]); - } - } - if (counters.size() > 0) { - conditionalPrint(csv, printOut, "\nCUDA ref name;Count\n", "[HIPIFY] info: " + prefix + " refs by names:\n"); - for (const auto &it : counters) { - printStat(csv, printOut, it.first, it.second); - } - } -} - -Statistics::Statistics(const std::string &name): fileName(name) { - // Compute the total bytes/lines in the input file. - std::ifstream src_file(name, std::ios::binary | std::ios::ate); - src_file.clear(); - src_file.seekg(0); - totalLines = (unsigned) std::count(std::istreambuf_iterator(src_file), std::istreambuf_iterator(), '\n'); - totalBytes = (int) src_file.tellg(); - if (totalBytes < 0) { - totalBytes = 0; - } - startTime = chr::steady_clock::now(); -} - -///////// Counter update routines ////////// - -void Statistics::incrementCounter(const hipCounter &counter, const std::string &name) { - if (Statistics::isUnsupported(counter)) { - unsupported.incrementCounter(counter, name); - } else { - supported.incrementCounter(counter, name); - } -} - -void Statistics::add(const Statistics &other) { - supported.add(other.supported); - unsupported.add(other.unsupported); - touchedBytes += other.touchedBytes; - totalBytes += other.totalBytes; - touchedLines += other.touchedLines; - totalLines += other.totalLines; - if (other.hasErrors && !hasErrors) { - hasErrors = true; - } - if (startTime > other.startTime) { - startTime = other.startTime; - } -} - -void Statistics::lineTouched(int lineNumber) { - touchedLinesSet.insert(lineNumber); - touchedLines = unsigned(touchedLinesSet.size()); -} - -void Statistics::bytesChanged(int bytes) { - touchedBytes += bytes; -} - -void Statistics::markCompletion() { - completionTime = chr::steady_clock::now(); -} - -///////// Output functions ////////// - -void Statistics::print(std::ostream* csv, llvm::raw_ostream* printOut, bool skipHeader) { - if (!skipHeader) { - std::string str = "file \'" + fileName + "\' statistics:\n"; - conditionalPrint(csv, printOut, "\n" + str, "\n[HIPIFY] info: " + str); - } - if (hasErrors || totalBytes <= 0 || totalLines <= 0) { - std::string str = "\n ERROR: Statistics is invalid due to failed hipification.\n\n"; - conditionalPrint(csv, printOut, str, str); - } - // Total number of (un)supported refs that were converted. - int supportedSum = supported.getConvSum(); - int unsupportedSum = unsupported.getConvSum(); - int allSum = supportedSum + unsupportedSum; - printStat(csv, printOut, "CONVERTED refs count", supportedSum); - printStat(csv, printOut, "UNCONVERTED refs count", unsupportedSum); - printStat(csv, printOut, "CONVERSION %", 100 - (0 == allSum ? 100 : std::lround(double(unsupportedSum * 100) / double(allSum)))); - printStat(csv, printOut, "REPLACED bytes", touchedBytes); - printStat(csv, printOut, "TOTAL bytes", totalBytes); - printStat(csv, printOut, "CHANGED lines of code", touchedLines); - printStat(csv, printOut, "TOTAL lines of code", totalLines); - printStat(csv, printOut, "CODE CHANGED (in bytes) %", 0 == totalBytes ? 0 : std::lround(double(touchedBytes * 100) / double(totalBytes))); - printStat(csv, printOut, "CODE CHANGED (in lines) %", 0 == totalLines ? 0 : std::lround(double(touchedLines * 100) / double(totalLines))); - typedef std::chrono::duration duration; - duration elapsed = completionTime - startTime; - std::stringstream stream; - stream << std::fixed << std::setprecision(2) << elapsed.count() / 1000; - printStat(csv, printOut, "TIME ELAPSED s", stream.str()); - supported.print(csv, printOut, "CONVERTED"); - unsupported.print(csv, printOut, "UNCONVERTED"); -} - -void Statistics::printAggregate(std::ostream *csv, llvm::raw_ostream* printOut) { - Statistics globalStats = getAggregate(); - // A file is considered "converted" if we made any changes to it. - int convertedFiles = 0; - for (const auto &p : stats) { - if (p.second.touchedLines && p.second.totalBytes && - p.second.totalLines && !p.second.hasErrors) { - convertedFiles++; - } - } - globalStats.markCompletion(); - globalStats.print(csv, printOut); - std::string str = "TOTAL statistics:"; - conditionalPrint(csv, printOut, "\n" + str + "\n", "\n[HIPIFY] info: " + str + "\n"); - printStat(csv, printOut, "CONVERTED files", convertedFiles); - printStat(csv, printOut, "PROCESSED files", stats.size()); -} - -//// Static state management //// - -Statistics Statistics::getAggregate() { - Statistics globalStats("GLOBAL"); - for (const auto &p : stats) { - globalStats.add(p.second); - } - return globalStats; -} - -Statistics &Statistics::current() { - assert(Statistics::currentStatistics); - return *Statistics::currentStatistics; -} - -void Statistics::setActive(const std::string &name) { - stats.emplace(std::make_pair(name, Statistics{name})); - Statistics::currentStatistics = &stats.at(name); -} - -bool Statistics::isToRoc(const hipCounter &counter) { - return TranslateToRoc && counter.apiType == API_BLAS; -} - -bool Statistics::isHipUnsupported(const hipCounter &counter) { - return HIP_UNSUPPORTED == (counter.supportDegree & HIP_UNSUPPORTED); -} - -bool Statistics::isRocUnsupported(const hipCounter &counter) { - return ROC_UNSUPPORTED == (counter.supportDegree & ROC_UNSUPPORTED); -} - -bool Statistics::isUnsupported(const hipCounter &counter) { - if (Statistics::isToRoc(counter)) { - return Statistics::isRocUnsupported(counter); - } else { - return Statistics::isHipUnsupported(counter); - } -} - -std::map Statistics::stats = {}; -Statistics* Statistics::currentStatistics = nullptr; diff --git a/hipify-clang/src/Statistics.h b/hipify-clang/src/Statistics.h deleted file mode 100644 index 6cff9cd9d6..0000000000 --- a/hipify-clang/src/Statistics.h +++ /dev/null @@ -1,250 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace chr = std::chrono; - -enum ConvTypes { - // Driver API: 5.2. Error Handling - // Runtime API: 5.3. Error Handling - CONV_ERROR = 0, - // Driver API : 5.3. Initialization - CONV_INIT, - // Driver API : 5.4. Version Management - // Runtime API: 5.28. Version Management - CONV_VERSION, - // Driver API : 5.5. Device Management, 5.6. Device Management [DEPRECATED] - // Runtime API: 5.1. Device Management - CONV_DEVICE, - // Driver API : 5.7. Primary Context Management, 5.8.Context Management, 5.9. Context Management [DEPRECATED] - CONV_CONTEXT, - // Driver API : 5.10. Module Management - CONV_MODULE, - // Driver API : 5.11. Memory Management - // Runtime API: 5.9. Memory Management, 5.10. Memory Management [DEPRECATED] - CONV_MEMORY, - // Driver API : 5.12. Virtual Memory Management - CONV_VIRTUAL_MEMORY, - // Driver API : 5.13. Unified Addressing - // Runtime API: 5.11. Unified Addressing - CONV_ADDRESSING, - // Driver API : 5.14. Stream Management - // Runtime API: 5.4. Stream Management - CONV_STREAM, - // Driver API : 5.15. Event Management - // Runtime API: 5.5. Event Management - CONV_EVENT, - // Driver API : 5.16. External Resource Interoperability - // Runtime API: 5.6.External Resource Interoperability - CONV_EXT_RES, - // Driver API : 5.17. Stream memory operations - CONV_STREAM_MEMORY, - // Driver API : 5.18. Execution Control, 5.19. Execution Control [DEPRECATED] - // Runtime API: 5.7.Execution Control, Former 5.9. Execution Control [DEPRECATED] - CONV_EXECUTION, - // Driver API : 5.20. Graph Management - // Runtime API: 5.29. Graph Management - CONV_GRAPH, - // Driver API : 5.21. Occupancy - // Runtime API: 5.8. Occupancy - CONV_OCCUPANCY, - // Driver API : 5.22. Texture Reference Management [DEPRECATED], 5.24. Texture Object Management - // Runtime API: 5.24. Texture Reference Management [DEPRECATED], 5.26. Texture Object Management - CONV_TEXTURE, - // Driver API : 5.23. Surface Reference Management [DEPRECATED], 5.25. Surface Object Management - // Runtime API: 5.25. Surface Reference Management [DEPRECATED], 5.27. Surface Object Management - CONV_SURFACE, - // Driver API : 5.26. Peer Context Memory Access - // Runtime API: 5.12. Peer Device Memory Access - CONV_PEER, - // Driver API : 5.27. Graphics Interoperability - // Runtime API: 5.23. Graphics Interoperability - CONV_GRAPHICS, - // Driver API : 5.28. Profiler Control - // Runtime API: 5.32. Profiler Control - CONV_PROFILER, - // Driver API : 5.29. OpenGL Interoperability - // Runtime API: 5.13. OpenGL Interoperability, 5.14. OpenGL Interoperability [DEPRECATED] - CONV_OPENGL, - // Driver API : 5.30. Direct3D 9 Interoperability - // Runtime API: 5.15. Direct3D 9 Interoperability, 5.16. Direct3D 9 Interoperability [DEPRECATED] - CONV_D3D9, - // Driver API : 5.31. Direct3D 10 Interoperability - // Runtime API: 5.17. Direct3D 10 Interoperability, 5.18. Direct3D 10 Interoperability [DEPRECATED] - CONV_D3D10, - // Driver API : 5.32. Direct3D 11 Interoperability - // Runtime API: 5.19. Direct3D 11 Interoperability, 5.20. Direct3D 11 Interoperability [DEPRECATED] - CONV_D3D11, - // Driver API : 5.33. VDPAU Interoperability - // Runtime API: 5.21. VDPAU Interoperability - CONV_VDPAU, - // Driver API : 5.34. EGL Interoperability - // Runtime API: 5.22. EGL Interoperability - CONV_EGL, - // Runtime API: 5.2. Thread Management [DEPRECATED] - CONV_THREAD, - CONV_COMPLEX, - CONV_LIB_FUNC, - CONV_LIB_DEVICE_FUNC, - CONV_DEVICE_FUNC, - CONV_INCLUDE, - CONV_INCLUDE_CUDA_MAIN_H, - CONV_TYPE, - CONV_LITERAL, - CONV_NUMERIC_LITERAL, - CONV_DEFINE, - CONV_EXTERN_SHARED, - CONV_KERNEL_LAUNCH, - CONV_LAST -}; -constexpr int NUM_CONV_TYPES = (int) ConvTypes::CONV_LAST; - -enum ApiTypes { - API_DRIVER = 0, - API_RUNTIME, - API_COMPLEX, - API_BLAS, - API_RAND, - API_DNN, - API_FFT, - API_SPARSE, - API_CUB, - API_CAFFE2, - API_LAST -}; -constexpr int NUM_API_TYPES = (int) ApiTypes::API_LAST; - -enum SupportDegree { - FULL = 0, - HIP_UNSUPPORTED = 1, - ROC_UNSUPPORTED = 2, - UNSUPPORTED = 3 -}; - -// The names of various fields in in the statistics reports. -extern const char *counterNames[NUM_CONV_TYPES]; -extern const char *counterTypes[NUM_CONV_TYPES]; -extern const char *apiNames[NUM_API_TYPES]; -extern const char *apiTypes[NUM_API_TYPES]; - -struct hipCounter { - llvm::StringRef hipName; - llvm::StringRef rocName; - ConvTypes type; - ApiTypes apiType; - SupportDegree supportDegree; -}; - -/** - * Tracks a set of named counters, as well as counters for each of the type enums defined above. - */ -class StatCounter { -private: - // Each thing we track is either "supported" or "unsupported"... - std::map counters; - int apiCounters[NUM_API_TYPES] = {}; - int convTypeCounters[NUM_CONV_TYPES] = {}; - -public: - void incrementCounter(const hipCounter &counter, const std::string &name); - // Add the counters from `other` onto the counters of this object. - void add(const StatCounter &other); - int getConvSum(); - void print(std::ostream* csv, llvm::raw_ostream* printOut, const std::string &prefix); -}; - -/** - * Tracks the statistics for a single input file. - */ -class Statistics { - StatCounter supported; - StatCounter unsupported; - std::string fileName; - std::set touchedLinesSet = {}; - unsigned touchedLines = 0; - unsigned totalLines = 0; - unsigned touchedBytes = 0; - int totalBytes = 0; - chr::steady_clock::time_point startTime; - chr::steady_clock::time_point completionTime; - -public: - Statistics(const std::string &name); - void incrementCounter(const hipCounter &counter, const std::string &name); - // Add the counters from `other` onto the counters of this object. - void add(const Statistics &other); - void lineTouched(int lineNumber); - void bytesChanged(int bytes); - // Set the completion timestamp to now. - void markCompletion(); - -public: - /** - * Pretty-print the statistics stored in this object. - * - * @param csv Pointer to an output stream for the CSV to write. If null, no CSV is written - * @param printOut Pointer to an output stream to print human-readable textual stats to. If null, no - * such stats are produced. - */ - void print(std::ostream* csv, llvm::raw_ostream* printOut, bool skipHeader = false); - // Print aggregated statistics for all registered counters. - static void printAggregate(std::ostream *csv, llvm::raw_ostream* printOut); - // The Statistics for each input file. - static std::map stats; - // The Statistics objects for the currently-being-processed input file. - static Statistics* currentStatistics; - // Aggregate statistics over all entries in `stats` and return the resulting Statistics object. - static Statistics getAggregate(); - /** - * Convenient global entry point for updating the "active" Statistics. Since we operate single-threadedly - * processing one file at a time, this allows us to simply expose the stats for the current file globally, - * simplifying things. - */ - static Statistics ¤t(); - /** - * Set the active Statistics object to the named one, creating it if necessary, and write the completion - * timestamp into the currently active one. - */ - static void setActive(const std::string &name); - // Check the counter and option TranslateToRoc whether it should be translated to Roc or not. - static bool isToRoc(const hipCounter &counter); - // Check whether the counter is HIP_UNSUPPORTED or not. - static bool isHipUnsupported(const hipCounter &counter); - // Check whether the counter is ROC_UNSUPPORTED or not. - static bool isRocUnsupported(const hipCounter &counter); - /** - * Check whether the counter is ROC_UNSUPPORTED/HIP_UNSUPPORTED/UNSUPPORTED or not - * based on counter's API_TYPE and option TranslateToRoc. - */ - static bool isUnsupported(const hipCounter &counter); - // Set this flag in case of hipification errors - bool hasErrors = false; -}; diff --git a/hipify-clang/src/StringUtils.cpp b/hipify-clang/src/StringUtils.cpp deleted file mode 100644 index 31ab331174..0000000000 --- a/hipify-clang/src/StringUtils.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include "StringUtils.h" -#include "LLVMCompat.h" -#include "llvm/ADT/SmallString.h" - -using namespace llvm; - -llvm::StringRef unquoteStr(llvm::StringRef s) { - if (s.size() > 1 && s.front() == '"' && s.back() == '"') { - return s.substr(1, s.size() - 2); - } - return s; -} - -void removePrefixIfPresent(std::string &s, const std::string &prefix) { - if (s.find(prefix) != 0) { - return; - } - s.erase(0, prefix.size()); -} - -std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC) { - if (sFile.empty()) { - return sFile; - } - if (!sys::fs::exists(sFile)) { - llvm::errs() << "\n" << sHipify << sError << "source file: " << sFile << " doesn't exist\n"; - EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); - return ""; - } - SmallString<256> fileAbsPath; - EC = llcompat::real_path(sFile, fileAbsPath, true); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": source file: " << sFile << "\n"; - return ""; - } - EC = std::error_code(); - return fileAbsPath.c_str(); -} - -std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, - const std::string &sDirType, bool bCreateDir) { - if (sDir.empty()) { - return sDir; - } - EC = std::error_code(); - SmallString<256> dirAbsPath; - if (sys::fs::exists(sDir)) { - if (sys::fs::is_regular_file(sDir)) { - llvm::errs() << "\n" << sHipify << sError << sDir << " is not a directory\n"; - EC = std::error_code(static_cast(std::errc::not_a_directory), std::generic_category()); - return ""; - } - } - else { - if (bCreateDir) { - EC = sys::fs::create_directory(sDir); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; - return ""; - } - } - else { - llvm::errs() << "\n" << sHipify << sError << sDirType << " directory: " << sDir << " doesn't exist\n"; - EC = std::error_code(static_cast(std::errc::no_such_file_or_directory), std::generic_category()); - return ""; - } - } - EC = llcompat::real_path(sDir, dirAbsPath, true); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << sDirType << " directory: " << sDir << "\n"; - return ""; - } - return dirAbsPath.c_str(); -} - diff --git a/hipify-clang/src/StringUtils.h b/hipify-clang/src/StringUtils.h deleted file mode 100644 index ecbca5e832..0000000000 --- a/hipify-clang/src/StringUtils.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include "llvm/ADT/StringRef.h" - -/** - * Remove double-quotes from the start/end of a string, if present. - */ -llvm::StringRef unquoteStr(llvm::StringRef s); - -/** - * If `s` starts with `prefix`, remove it. Otherwise, does nothing. - */ -void removePrefixIfPresent(std::string &s, const std::string &prefix); - -/** - * Returns Absolute File Path based on filename, otherwise - error. - */ -std::string getAbsoluteFilePath(const std::string &sFile, std::error_code &EC); - -/** - * Returns Absolute Directory Path based on directory name, otherwise - error; - * by default the directory is temporary and created. - */ -std::string getAbsoluteDirectoryPath(const std::string &sDir, std::error_code &EC, - const std::string &sDirType = "temporary", bool bCreateDir = true); diff --git a/hipify-clang/src/main.cpp b/hipify-clang/src/main.cpp deleted file mode 100644 index cb411eba2f..0000000000 --- a/hipify-clang/src/main.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include -#include -#include -#include "CUDA2HIP.h" -#include "CUDA2HIP_Scripting.h" -#include "LLVMCompat.h" -#include "HipifyAction.h" -#include "ArgParse.h" -#include "StringUtils.h" -#include "llvm/Support/Debug.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticIDs.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Driver/Driver.h" -#include "clang/Driver/Compilation.h" -#include "clang/Driver/Tool.h" -#include "clang/Frontend/TextDiagnosticPrinter.h" - -#if LLVM_VERSION_MAJOR < 8 -#include "llvm/Support/Path.h" -#endif - -constexpr auto DEBUG_TYPE = "cuda2hip"; - -namespace ct = clang::tooling; - -void cleanupHipifyOptions(std::vector &args) { - std::vector hipifyOptions = {"-perl", "-python", "-roc", "-inplace", - "-no-backup", "-no-output", "-print-stats", - "-print-stats-csv", "-examine", "-save-temps", - "-skip-excluded-preprocessor-conditional-blocks"}; - for (const auto &a : hipifyOptions) { - args.erase(std::remove(args.begin(), args.end(), a), args.end()); - args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end()); - } - std::vector hipifyDirOptions = {"-o-dir", "-o-hipify-perl-dir", "-o-stats", - "-o-python-map-dir", "-temp-dir"}; - for (const auto &a : hipifyDirOptions) { - // remove all pairs of arguments "-option value" - auto it = args.erase(std::remove(args.begin(), args.end(), a), args.end()); - if (it != args.end()) { - args.erase(it); - } - // remove all pairs of arguments "--option value" - it = args.erase(std::remove(args.begin(), args.end(), "-" + a), args.end()); - if (it != args.end()) { - args.erase(it); - } - // remove all "-option=value" and "--option=value" - args.erase( - std::remove_if(args.begin(), args.end(), - [a](const std::string &s) { return s.find(a + "=") == 0 || s.find("-" + a + "=") == 0; } - ), - args.end() - ); - } -} - -void sortInputFiles(int argc, const char **argv, std::vector &files) { - if (files.size() < 2) return; - IntrusiveRefCntPtr diagOpts(new clang::DiagnosticOptions()); - clang::TextDiagnosticPrinter diagClient(llvm::errs(), &*diagOpts); - clang::DiagnosticsEngine Diagnostics(IntrusiveRefCntPtr(new clang::DiagnosticIDs()), &*diagOpts, &diagClient, false); - std::unique_ptr driver(new clang::driver::Driver("", "nvptx64-nvidia-cuda", Diagnostics)); - std::vector Args(argv, argv + argc); - cleanupHipifyOptions(Args); - std::unique_ptr C(driver->BuildCompilation(Args)); - std::vector sortedFiles; - for (const auto &J : C->getJobs()) { - if (std::string(J.getCreator().getName()) != "clang") continue; - const auto &JA = J.getArguments(); - for (size_t i = 0; i < JA.size(); ++i) { - const auto &A = std::string(JA[i]); - if (std::find(files.begin(), files.end(), A) != files.end() && - i > 0 && std::string(JA[i - 1]) == "-main-file-name") { - sortedFiles.push_back(A); - } - } - } - if (sortedFiles.empty()) return; - std::reverse(sortedFiles.begin(), sortedFiles.end()); - files.assign(sortedFiles.begin(), sortedFiles.end()); -} - -void appendArgumentsAdjusters(ct::RefactoringTool &Tool, const std::string &sSourceAbsPath, const char *hipify_exe) { - if (!IncludeDirs.empty()) { - for (std::string s : IncludeDirs) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-I", ct::ArgumentInsertPosition::BEGIN)); - } - } - if (!MacroNames.empty()) { - for (std::string s : MacroNames) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(s.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-D", ct::ArgumentInsertPosition::BEGIN)); - } - } - // Includes for clang's CUDA wrappers for using by packaged hipify-clang - static int Dummy; - std::string hipify = llvm::sys::fs::getMainExecutable(hipify_exe, (void *)&Dummy); - std::string clang_inc_path = std::string(llvm::sys::path::parent_path(hipify)); - clang_inc_path.append("/include"); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-isystem", ct::ArgumentInsertPosition::BEGIN)); - clang_inc_path.append("/cuda_wrappers"); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(clang_inc_path.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-isystem", ct::ArgumentInsertPosition::BEGIN)); - // Ensure at least c++11 is used. - std::string stdCpp = "-std=c++11"; -#if defined(_MSC_VER) - stdCpp = "-std=c++14"; -#endif - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(stdCpp.c_str(), ct::ArgumentInsertPosition::BEGIN)); - std::string sInclude = "-I" + sys::path::parent_path(sSourceAbsPath).str(); -#if defined(HIPIFY_CLANG_RES) - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-resource-dir=" HIPIFY_CLANG_RES, ct::ArgumentInsertPosition::BEGIN)); -#endif - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sInclude.c_str(), ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-fno-delayed-template-parsing", ct::ArgumentInsertPosition::BEGIN)); - if (llcompat::pragma_once_outside_header()) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-Wno-pragma-once-outside-header", ct::ArgumentInsertPosition::BEGIN)); - } - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("--cuda-host-only", ct::ArgumentInsertPosition::BEGIN)); - if (!CudaGpuArch.empty()) { - std::string sCudaGpuArch = "--cuda-gpu-arch=" + CudaGpuArch; - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaGpuArch.c_str(), ct::ArgumentInsertPosition::BEGIN)); - } - if (!CudaPath.empty()) { - std::string sCudaPath = "--cuda-path=" + CudaPath; - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster(sCudaPath.c_str(), ct::ArgumentInsertPosition::BEGIN)); - } - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("cuda", ct::ArgumentInsertPosition::BEGIN)); - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-x", ct::ArgumentInsertPosition::BEGIN)); - if (Verbose) { - Tool.appendArgumentsAdjuster(ct::getInsertArgumentAdjuster("-v", ct::ArgumentInsertPosition::END)); - } - Tool.appendArgumentsAdjuster(ct::getClangSyntaxOnlyAdjuster()); -} - -bool generatePython() { - bool bToRoc = TranslateToRoc; - TranslateToRoc = true; - bool bToPython = python::generate(GeneratePython); - TranslateToRoc = bToRoc; - return bToPython; -} - -int main(int argc, const char **argv) { - std::vector new_argv(argv, argv + argc); - if (std::find(new_argv.begin(), new_argv.end(), std::string("--")) == new_argv.end()) { - new_argv.push_back("--"); - new_argv.push_back(nullptr); - argv = new_argv.data(); - argc++; - } - llcompat::PrintStackTraceOnErrorSignal(); - ct::CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::ZeroOrMore); - if (!llcompat::CheckCompatibility()) { - return 1; - } - std::vector fileSources = OptionsParser.getSourcePathList(); - if (fileSources.empty() && !GeneratePerl && !GeneratePython) { - llvm::errs() << "\n" << sHipify << sError << "Must specify at least 1 positional argument for source file" << "\n"; - return 1; - } - if (!perl::generate(GeneratePerl)) { - llvm::errs() << "\n" << sHipify << sError << "hipify-perl generating failed" << "\n"; - return 1; - } - if (!generatePython()) { - llvm::errs() << "\n" << sHipify << sError << "hipify-python generating failed" << "\n"; - return 1; - } - if (fileSources.empty()) { - return 0; - } - std::string dst = OutputFilename, dstDir = OutputDir; - std::error_code EC; - std::string sOutputDirAbsPath = getAbsoluteDirectoryPath(OutputDir, EC, "output"); - if (EC) { - return 1; - } - if (!dst.empty()) { - if (fileSources.size() > 1) { - llvm::errs() << sHipify << sConflict << "-o and multiple source files are specified\n"; - return 1; - } - if (Inplace) { - llvm::errs() << sHipify << sConflict << "both -o and -inplace options are specified\n"; - return 1; - } - if (NoOutput) { - llvm::errs() << sHipify << sConflict << "both -no-output and -o options are specified\n"; - return 1; - } - if (!dstDir.empty()) { - dst = sOutputDirAbsPath + "/" + dst; - } - } - if (NoOutput && Inplace) { - llvm::errs() << sHipify << sConflict << "both -no-output and -inplace options are specified\n"; - return 1; - } - if (!dstDir.empty() && Inplace) { - llvm::errs() << sHipify << sConflict << "both -o-dir and -inplace options are specified\n"; - return 1; - } - if (Examine) { - NoOutput = PrintStats = true; - } - int Result = 0; - SmallString<128> tmpFile; - StringRef sourceFileName, ext = "hip", csv_ext = "csv"; - std::string sTmpFileName, sSourceAbsPath; - std::string sTmpDirAbsParh = getAbsoluteDirectoryPath(TemporaryDir, EC); - if (EC) { - return 1; - } - // Arguments for the Statistics print routines. - std::unique_ptr csv = nullptr; - llvm::raw_ostream *statPrint = nullptr; - bool create_csv = false; - if (!OutputStatsFilename.empty()) { - PrintStatsCSV = true; - create_csv = true; - } else { - if (PrintStatsCSV && fileSources.size() > 1) { - OutputStatsFilename = "sum_stat.csv"; - create_csv = true; - } - } - if (create_csv) { - if (!OutputDir.empty()) { - OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename; - } - csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); - } - if (PrintStats) { - statPrint = &llvm::errs(); - } - sortInputFiles(argc, argv, fileSources); - for (const auto &src : fileSources) { - // Create a copy of the file to work on. When we're done, we'll move this onto the - // output (which may mean overwriting the input, if we're in-place). - // Should we fail for some reason, we'll just leak this file and not corrupt the input. - sSourceAbsPath = getAbsoluteFilePath(src, EC); - if (EC) { - continue; - } - sourceFileName = sys::path::filename(sSourceAbsPath); - if (dst.empty()) { - if (Inplace) { - dst = src; - } else { - dst = src + "." + ext.str(); - if (!dstDir.empty()) { - dst = sOutputDirAbsPath + "/" + sourceFileName.str() + "." + ext.str(); - } - } - } - if (TemporaryDir.empty()) { - EC = sys::fs::createTemporaryFile(sourceFileName, ext, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": " << tmpFile << "\n"; - Result = 1; - continue; - } - } else { - sTmpFileName = sTmpDirAbsParh + "/" + sourceFileName.str() + "." + ext.str(); - tmpFile = sTmpFileName; - } - EC = sys::fs::copy_file(src, tmpFile); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << src << " to " << tmpFile << "\n"; - Result = 1; - continue; - } - if (PrintStatsCSV) { - if (OutputStatsFilename.empty()) { - OutputStatsFilename = sourceFileName.str() + "." + csv_ext.str(); - if (!OutputDir.empty()) { - OutputStatsFilename = sOutputDirAbsPath + "/" + OutputStatsFilename; - } - } - if (!csv) { - csv = std::unique_ptr(new std::ofstream(OutputStatsFilename, std::ios_base::trunc)); - } - } - // Initialise the statistics counters for this file. - Statistics::setActive(src); - // RefactoringTool operates on the file in-place. Giving it the output path is no good, - // because that'll break relative includes, and we don't want to overwrite the input file. - // So what we do is operate on a copy, which we then move to the output. - ct::RefactoringTool Tool(OptionsParser.getCompilations(), std::string(tmpFile.c_str())); - ct::Replacements &replacementsToUse = llcompat::getReplacements(Tool, tmpFile.c_str()); - ReplacementsFrontendActionFactory actionFactory(&replacementsToUse); - appendArgumentsAdjusters(Tool, sSourceAbsPath, argv[0]); - Statistics ¤tStat = Statistics::current(); - // Hipify _all_ the things! - if (Tool.runAndSave(&actionFactory)) { - currentStat.hasErrors = true; - Result = 1; - LLVM_DEBUG(llvm::dbgs() << "Skipped some replacements.\n"); - } - // Copy the tmpfile to the output - if (!NoOutput && !currentStat.hasErrors) { - EC = sys::fs::copy_file(tmpFile, dst); - if (EC) { - llvm::errs() << "\n" << sHipify << sError << EC.message() << ": while copying " << tmpFile << " to " << dst << "\n"; - Result = 1; - continue; - } - } - // Remove the tmp file without error check - if (!SaveTemps) { - sys::fs::remove(tmpFile); - } - Statistics::current().markCompletion(); - Statistics::current().print(csv.get(), statPrint); - dst.clear(); - } - if (fileSources.size() > 1) { - Statistics::printAggregate(csv.get(), statPrint); - } - return Result; -} diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h index e6549dde0d..76ac8710d5 100644 --- a/include/hip/hcc_detail/device_functions.h +++ b/include/hip/hcc_detail/device_functions.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include #include -#if __HIP_CLANG_ONLY__ && __HIP_VDI__ +#if __HIP_CLANG_ONLY__ && __HIP_VDI__ && !_WIN32 extern "C" __device__ int printf(const char *fmt, ...); #else #if HC_FEATURE_PRINTF @@ -128,7 +128,7 @@ __device__ static int __mul24(int x, int y); __device__ static long long int __mul64hi(long long int x, long long int y); __device__ static int __mulhi(int x, int y); __device__ static int __rhadd(int x, int y); -__device__ static unsigned int __sad(int x, int y, int z); +__device__ static unsigned int __sad(int x, int y,unsigned int z); __device__ static unsigned int __uhadd(unsigned int x, unsigned int y); __device__ static int __umul24(unsigned int x, unsigned int y); __device__ static unsigned long long int __umul64hi(unsigned long long int x, unsigned long long int y); @@ -199,7 +199,7 @@ __device__ static inline int __rhadd(int x, int y) { int value = z & 0x7FFFFFFF; return ((value) >> 1 || sign); } -__device__ static inline unsigned int __sad(int x, int y, int z) { +__device__ static inline unsigned int __sad(int x, int y, unsigned int z) { return x > y ? x - y + z : y - x + z; } __device__ static inline unsigned int __uhadd(unsigned int x, unsigned int y) { @@ -230,7 +230,7 @@ __device__ static inline unsigned int __urhadd(unsigned int x, unsigned int y) { return (x + y + 1) >> 1; } __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, unsigned int z) { - return __ockl_sad_u32(x, y, z); + return __ockl_sadd_u32(x, y, z); } __device__ static inline unsigned int __lane_id() { return __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); } @@ -563,7 +563,7 @@ long __shfl_xor(long var, int lane_mask, int width = warpSize) return tmp1; #else static_assert(sizeof(long) == sizeof(int), ""); - return static_cast(__shfl_down(static_cast(var), lane_mask, width)); + return static_cast(__shfl_xor(static_cast(var), lane_mask, width)); #endif } __device__ @@ -1076,6 +1076,8 @@ void __assert_fail(const char * __assertion, unsigned int __line, const char *__function) { + printf("%s:%u: %s: Device-side assertion `%s' failed.\n", __file, __line, + __function, __assertion); // Ignore all the args for now. __builtin_trap(); } diff --git a/include/hip/hcc_detail/device_library_decls.h b/include/hip/hcc_detail/device_library_decls.h index ac35823cd2..2eadb86774 100644 --- a/include/hip/hcc_detail/device_library_decls.h +++ b/include/hip/hcc_detail/device_library_decls.h @@ -44,7 +44,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_mul24_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul24_i32(int, int); extern "C" __device__ __attribute__((const)) uint __ockl_mul_hi_u32(uint, uint); extern "C" __device__ __attribute__((const)) int __ockl_mul_hi_i32(int, int); -extern "C" __device__ __attribute__((const)) uint __ockl_sad_u32(uint, uint, uint); +extern "C" __device__ __attribute__((const)) uint __ockl_sadd_u32(uint, uint, uint); extern "C" __device__ __attribute__((const)) uchar __ockl_clz_u8(uchar); extern "C" __device__ __attribute__((const)) ushort __ockl_clz_u16(ushort); @@ -72,6 +72,7 @@ extern "C" __device__ __attribute__((const)) uint __ockl_multi_grid_thread_rank( extern "C" __device__ __attribute__((const)) int __ockl_multi_grid_is_valid(void); extern "C" __device__ __attribute__((convergent)) void __ockl_multi_grid_sync(void); +extern "C" __device__ void __ockl_atomic_add_noret_f32(float*, float); // Introduce local address space #define __local __attribute__((address_space(3))) diff --git a/include/hip/hcc_detail/functional_grid_launch.hpp b/include/hip/hcc_detail/functional_grid_launch.hpp index cf4422070f..efe6a60197 100644 --- a/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/include/hip/hcc_detail/functional_grid_launch.hpp @@ -37,14 +37,15 @@ THE SOFTWARE. hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps); -hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps); - -hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags, hip_impl::program_state& ps); - - +hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps); +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps); #pragma GCC visibility push(hidden) @@ -139,10 +140,10 @@ void hipLaunchKernelGGLImpl( } // Namespace hip_impl. -template +template inline -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, - F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { using namespace hip_impl; @@ -150,22 +151,24 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), target_agent(0)); - return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); } -template +template inline -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, F kernel, - uint32_t blockSize, size_t dynSharedMemPerBlk) { +hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { using namespace hip_impl; hip_impl::hip_init(); + if(flags != hipOccupancyDefault) return hipErrorNotSupported; auto f = get_program_state().kernel_descriptor(reinterpret_cast(kernel), target_agent(0)); - return hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, blockSize, dynSharedMemPerBlk); + return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, + dynSharedMemPerBlk, blockSizeLimit); } template @@ -192,22 +195,24 @@ void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks, template inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDimX, void** kernelParams, - unsigned int sharedMemBytes, hipStream_t stream) { - +hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim, + void** args, size_t sharedMem, + hipStream_t stream) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, blockDimX, kernelParams, sharedMemBytes, stream, ps); + return hipLaunchCooperativeKernel(reinterpret_cast(f), gridDim, + blockDim, args, sharedMem, stream, ps); } inline __attribute__((visibility("hidden"))) -hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, - unsigned int flags) { +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags) { hip_impl::hip_init(); auto& ps = hip_impl::get_program_state(); - return ihipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); + return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps); } #pragma GCC visibility pop diff --git a/include/hip/hcc_detail/hip_atomic.h b/include/hip/hcc_detail/hip_atomic.h index 263f639e96..d00ebcdabb 100644 --- a/include/hip/hcc_detail/hip_atomic.h +++ b/include/hip/hcc_detail/hip_atomic.h @@ -73,6 +73,14 @@ float atomicAdd(float* address, float val) return __uint_as_float(r); } + +__device__ +inline +void atomicAddNoRet(float* address, float val) +{ + __ockl_atomic_add_noret_f32(address, val); +} + __device__ inline double atomicAdd(double* address, double val) diff --git a/include/hip/hcc_detail/hip_fp16.h b/include/hip/hcc_detail/hip_fp16.h index 52abc1a004..6fa86e94b9 100644 --- a/include/hip/hcc_detail/hip_fp16.h +++ b/include/hip/hcc_detail/hip_fp16.h @@ -229,7 +229,7 @@ THE SOFTWARE. __host__ __device__ operator __half_raw() const { return __half_raw{data}; } __host__ __device__ - operator volatile __half_raw() const volatile + operator __half_raw() const volatile { return __half_raw{data}; } @@ -1268,6 +1268,13 @@ THE SOFTWARE. static_cast<__half_raw>(x).data + static_cast<__half_raw>(y).data}; } + inline + __device__ + __half __habs(__half x) + { + return __half_raw{ + __ocml_fabs_f16(static_cast<__half_raw>(x).data)}; + } inline __device__ __half __hsub(__half x, __half y) @@ -1334,6 +1341,13 @@ THE SOFTWARE. static_cast<__half2_raw>(x).data + static_cast<__half2_raw>(y).data}; } + inline + __device__ + __half2 __habs2(__half2 x) + { + return __half2_raw{ + __ocml_fabs_2f16(static_cast<__half2_raw>(x).data)}; + } inline __device__ __half2 __hsub2(__half2 x, __half2 y) diff --git a/include/hip/hcc_detail/hip_fp16_math_fwd.h b/include/hip/hcc_detail/hip_fp16_math_fwd.h index eeb617c40b..95403e6ca8 100644 --- a/include/hip/hcc_detail/hip_fp16_math_fwd.h +++ b/include/hip/hcc_detail/hip_fp16_math_fwd.h @@ -38,6 +38,7 @@ extern "C" __device__ __attribute__((const)) _Float16 __ocml_floor_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_fma_f16(_Float16, _Float16, _Float16); + __device__ __attribute__((const)) _Float16 __ocml_fabs_f16(_Float16); __device__ __attribute__((const)) int __ocml_isinf_f16(_Float16); __device__ __attribute__((const)) int __ocml_isnan_f16(_Float16); __device__ __attribute__((pure)) _Float16 __ocml_log_f16(_Float16); @@ -58,6 +59,7 @@ extern "C" #endif __device__ __attribute__((const)) __2f16 __ocml_ceil_2f16(__2f16); + __device__ __attribute__((const)) __2f16 __ocml_fabs_2f16(__2f16); __device__ __2f16 __ocml_cos_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp_2f16(__2f16); __device__ __attribute__((pure)) __2f16 __ocml_exp10_2f16(__2f16); diff --git a/include/hip/hcc_detail/hip_memory.h b/include/hip/hcc_detail/hip_memory.h index 866b9e879e..0c006143de 100644 --- a/include/hip/hcc_detail/hip_memory.h +++ b/include/hip/hcc_detail/hip_memory.h @@ -27,7 +27,7 @@ THE SOFTWARE. // HIP heap is implemented as a global array with fixed size. Users may define // __HIP_SIZE_OF_PAGE and __HIP_NUM_PAGES to have a larger heap. -#if __HCC__ || __HIP__ +#if (__HCC__ || __HIP__) && __HIP_ENABLE_DEVICE_MALLOC__ // Size of page in bytes. #ifndef __HIP_SIZE_OF_PAGE diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index a3a0963ba9..28d3ae7051 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -44,6 +44,11 @@ THE SOFTWARE. #include #endif //__cplusplus +// __hip_malloc is not working. Disable it by default. +#ifndef __HIP_ENABLE_DEVICE_MALLOC__ +#define __HIP_ENABLE_DEVICE_MALLOC__ 0 +#endif + #if __HCC_OR_HIP_CLANG__ #if __HIP__ @@ -308,11 +313,15 @@ static constexpr Coordinates threadIdx{}; #endif // defined __HCC__ #if __HCC_OR_HIP_CLANG__ +#if __HIP_ENABLE_DEVICE_MALLOC__ extern "C" __device__ void* __hip_malloc(size_t); extern "C" __device__ void* __hip_free(void* ptr); - static inline __device__ void* malloc(size_t size) { return __hip_malloc(size); } static inline __device__ void* free(void* ptr) { return __hip_free(ptr); } +#else +static inline __device__ void* malloc(size_t size) { __builtin_trap(); return nullptr; } +static inline __device__ void* free(void* ptr) { __builtin_trap(); return nullptr; } +#endif #endif //__HCC_OR_HIP_CLANG__ @@ -504,9 +513,14 @@ hc_get_workitem_absolute_id(int dim) #define __CUDA__ #include <__clang_cuda_math_forward_declares.h> #include <__clang_cuda_complex_builtins.h> -#include -#include -#include +// Workaround for using libc++ with HIP-Clang. +// The following headers requires clang include path before standard C++ include path. +// However libc++ include path requires to be before clang include path. +// To workaround this, we pass -isystem with the parent directory of clang include +// path instead of the clang include path itself. +#include +#include +#include #undef __CUDA__ #pragma pop_macro("__CUDA__") #endif // !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 67bd2486d0..4ab66477fb 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -55,7 +55,7 @@ THE SOFTWARE. #define DEPRECATED(msg) __attribute__ ((deprecated(msg))) #endif // !defined(_MSC_VER) -#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases.For more details please refer https://github.com/ROCm-Developer-Tools/HIP/tree/master/docs/markdown/hip_deprecated_api_list" +#define DEPRECATED_MSG "This API is marked as deprecated and may not be supported in future releases. For more details please refer https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_deprecated_api_list.md" #if defined(__HCC__) && (__hcc_workweek__ < 16155) #error("This version of HIP requires a newer version of HCC."); @@ -97,8 +97,6 @@ typedef int hipDevice_t; typedef struct ihipStream_t* hipStream_t; -// TODO: IPC implementation - #define hipIpcMemLazyEnablePeerAccess 0 #define HIP_IPC_HANDLE_SIZE 64 @@ -107,13 +105,15 @@ typedef struct hipIpcMemHandle_st { char reserved[HIP_IPC_HANDLE_SIZE]; } hipIpcMemHandle_t; +#if __HIP_VDI__ // TODO: IPC event handle currently unsupported struct ihipIpcEventHandle_t; typedef struct ihipIpcEventHandle_t* hipIpcEventHandle_t; - - -// END TODO - +#else +typedef struct hipIpcEventHandle_st { + char reserved[HIP_IPC_HANDLE_SIZE]; +} hipIpcEventHandle_t; +#endif typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; @@ -266,7 +266,6 @@ typedef enum hipSharedMemConfig { ///< when adjacent threads access data 4 bytes apart. } hipSharedMemConfig; - /** * Struct for data in 3D * @@ -3007,9 +3006,51 @@ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsLi * * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue */ -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, + +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit); + int blockSizeLimit); + +/** + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel + * + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * @param [in] flags Extra flags for occupancy calculation (only default supported) + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue + */ +//TODO - Match CUoccupancyB2DSize +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] func Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (only default supported) + */ +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); /** * @brief Returns occupancy for a device function. @@ -3020,18 +3061,7 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block */ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk); - -/** - * @brief Returns occupancy for a device function. - * - * @param [out] numBlocks Returned occupancy - * @param [in] func Kernel function (hipFunction) for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with - * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - */ -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk); /** * @brief Returns occupancy for a device function. @@ -3043,19 +3073,22 @@ hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( * @param [in] flags Extra flags for occupancy calculation (currently ignored) */ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); /** - * @brief Returns occupancy for a device function. + * @brief determine the grid and block sizes to achieves maximum occupancy for a kernel * - * @param [out] numBlocks Returned occupancy - * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated - * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [out] gridSize minimum grid size for maximum potential occupancy + * @param [out] blockSize block size for maximum potential occupancy + * @param [in] f kernel function for which occupancy is calulated * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block - * @param [in] flags Extra flags for occupancy calculation (currently ignored) + * @param [in] blockSizeLimit the maximum block size for the kernel, use 0 for no limit + * + * @returns hipSuccess, hipInvalidDevice, hipErrorInvalidValue */ -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit); /** * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched @@ -3201,10 +3234,8 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned hipError_t hipIpcCloseMemHandle(void* devPtr); -// hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr); -// hipError_t hipIpcCloseMemHandle(void *devPtr); -// // hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); -// hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned int flags); +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event); +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle); /** @@ -3524,17 +3555,16 @@ hipError_t hipTexObjectGetTextureDesc( #endif #if defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) -template -static hipError_t __host__ inline hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, F func, uint32_t blockSize, size_t dynSharedMemPerBlk) { - return ::hipOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, (hipFunction_t)func, blockSize, - dynSharedMemPerBlk); +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); } -template -static hipError_t __host__ inline hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, F func, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { - return ::hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - numBlocks, (hipFunction_t)func, blockSize, dynSharedMemPerBlk, flags); + +template +static hipError_t __host__ inline hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + T f, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) { + return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize, reinterpret_cast(f),dynSharedMemPerBlk,blockSizeLimit); } #endif // defined(__cplusplus) && !defined(__HCC__) && defined(__clang__) && defined(__HIP__) @@ -3600,6 +3630,20 @@ const char* hipKernelNameRef(const hipFunction_t f); #ifdef __cplusplus +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk) { + return hipOccupancyMaxActiveBlocksPerMultiprocessor( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk); +} + +template +inline hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, T f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { + return hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + numBlocks, reinterpret_cast(f), blockSize, dynSharedMemPerBlk, flags); +} + class TlsData; #if !__HIP_VDI__ @@ -3753,7 +3797,7 @@ hipError_t hipBindTextureToMipmappedArray(const texture& tex, #if __HIP_VDI__ && !defined(__HCC__) template -inline hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, +inline hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, F kernel, size_t dynSharedMemPerBlk, uint32_t blockSizeLimit) { return hipOccupancyMaxPotentialBlockSize(gridSize, blockSize,(hipFunction_t)kernel, dynSharedMemPerBlk, blockSizeLimit); } @@ -3854,7 +3898,7 @@ static inline hipError_t hipBindTexture( const void *devPtr, size_t size = UINT_MAX) { - return hipBindTexture(offset, tex, devPtr, tex.channelDesc, size); + return hipBindTexture(offset, &tex, devPtr, tex.channelDesc, size); } template @@ -3898,9 +3942,9 @@ static inline hipError_t hipBindTextureToArray( const struct texture &tex, hipArray_const_t array) { - struct cudaChannelFormatDesc desc; + struct hipChannelFormatDesc desc; hipError_t err = hipGetChannelDesc(&desc, array); - return (err == hipSuccess) ? hipBindTextureToArray(tex, array, desc) : err; + return (err == hipSuccess) ? hipBindTextureToArray(&tex, array, desc) : err; } template @@ -3924,14 +3968,14 @@ static inline hipError_t hipBindTextureToMipmappedArray( return err; } err = hipGetChannelDesc(&desc, levelArray); - return (err == hipSuccess) ? hipBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err; + return (err == hipSuccess) ? hipBindTextureToMipmappedArray(&tex, mipmappedArray, desc) : err; } template static inline hipError_t hipBindTextureToMipmappedArray( const struct texture &tex, hipMipmappedArray_const_t mipmappedArray, - const struct cudaChannelFormatDesc &desc) + const struct hipChannelFormatDesc &desc) { return hipBindTextureToMipmappedArray(&tex, mipmappedArray, &desc); } diff --git a/include/hip/hcc_detail/hip_texture_types.h b/include/hip/hcc_detail/hip_texture_types.h index e92babfd5a..7c3a0138c1 100644 --- a/include/hip/hcc_detail/hip_texture_types.h +++ b/include/hip/hcc_detail/hip_texture_types.h @@ -65,6 +65,10 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { channelDesc = hipCreateChannelDesc(); sRGB = 0; textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; } texture(int norm, enum hipTextureFilterMode fMode, enum hipTextureAddressMode aMode, @@ -78,6 +82,10 @@ struct __HIP_TEXTURE_ATTRIB texture : public textureReference { channelDesc = desc; sRGB = 0; textureObject = nullptr; + maxAnisotropy = 0; + mipmapLevelBias = 0; + minMipmapLevelClamp = 0; + maxMipmapLevelClamp = 0; } }; diff --git a/include/hip/hcc_detail/hip_vector_types.h b/include/hip/hcc_detail/hip_vector_types.h index 39457795ae..19259a3657 100644 --- a/include/hip/hcc_detail/hip_vector_types.h +++ b/include/hip/hcc_detail/hip_vector_types.h @@ -34,7 +34,7 @@ THE SOFTWARE. #include "hip/hcc_detail/host_defines.h" -#if !defined(_MSC_VER) || __clang__ +#if defined(__has_attribute) #if __has_attribute(ext_vector_type) #define __NATIVE_VECTOR__(n, T) T __attribute__((ext_vector_type(n))) #else @@ -694,7 +694,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator-() noexcept + HIP_vector_type operator-() const noexcept { auto tmp(*this); tmp.data = -tmp.data; @@ -705,7 +705,7 @@ THE SOFTWARE. typename U = T, typename std::enable_if{}>::type* = nullptr> inline __host__ __device__ - HIP_vector_type operator~() noexcept + HIP_vector_type operator~() const noexcept { HIP_vector_type r{*this}; r.data = ~r.data; @@ -1241,7 +1241,9 @@ DECLOP_MAKE_ONE_COMPONENT(signed long long, longlong1); DECLOP_MAKE_TWO_COMPONENT(signed long long, longlong2); DECLOP_MAKE_THREE_COMPONENT(signed long long, longlong3); DECLOP_MAKE_FOUR_COMPONENT(signed long long, longlong4); -#else // defined(_MSC_VER) +#else // !defined(__has_attribute) + +#if defined(_MSC_VER) #include #include #include @@ -1347,5 +1349,92 @@ typedef union { double4 data; } double3; typedef union { __m256d data[2]; } double8; typedef union { __m256d data[4]; } double16; +#else // !defined(_MSC_VER) + +typedef union { char data; } char1; +typedef union { char data[2]; } char2; +typedef union { char data[4]; } char4; +typedef union { char data[8]; } char8; +typedef union { char data[16]; } char16; +typedef union { char4 data; } char3; + +typedef union { unsigned char data; } uchar1; +typedef union { unsigned char data[2]; } uchar2; +typedef union { unsigned char data[4]; } uchar4; +typedef union { unsigned char data[8]; } uchar8; +typedef union { unsigned char data[16]; } uchar16; +typedef union { uchar4 data; } uchar3; + +typedef union { short data; } short1; +typedef union { short data[2]; } short2; +typedef union { short data[4]; } short4; +typedef union { short data[8]; } short8; +typedef union { short data[16]; } short16; +typedef union { short4 data; } short3; + +typedef union { unsigned short data; } ushort1; +typedef union { unsigned short data[2]; } ushort2; +typedef union { unsigned short data[4]; } ushort4; +typedef union { unsigned short data[8]; } ushort8; +typedef union { unsigned short data[16]; } ushort16; +typedef union { ushort4 data; } ushort3; + +typedef union { int data; } int1; +typedef union { int data[2]; } int2; +typedef union { int data[4]; } int4; +typedef union { int data[8]; } int8; +typedef union { int data[16]; } int16; +typedef union { int4 data; } int3; + +typedef union { unsigned int data; } uint1; +typedef union { unsigned int data[2]; } uint2; +typedef union { unsigned int data[4]; } uint4; +typedef union { unsigned int data[8]; } uint8; +typedef union { unsigned int data[16]; } uint16; +typedef union { uint4 data; } uint3; + +typedef union { long data; } long1; +typedef union { long data[2]; } long2; +typedef union { long data[4]; } long4; +typedef union { long data[8]; } long8; +typedef union { long data[16]; } long16; +typedef union { long4 data; } long3; + +typedef union { unsigned long data; } ulong1; +typedef union { unsigned long data[2]; } ulong2; +typedef union { unsigned long data[4]; } ulong4; +typedef union { unsigned long data[8]; } ulong8; +typedef union { unsigned long data[16]; } ulong16; +typedef union { ulong4 data; } ulong3; + +typedef union { long long data; } longlong1; +typedef union { long long data[2]; } longlong2; +typedef union { long long data[4]; } longlong4; +typedef union { long long data[8]; } longlong8; +typedef union { long long data[16]; } longlong16; +typedef union { longlong4 data; } longlong3; + +typedef union { unsigned long long data; } ulonglong1; +typedef union { unsigned long long data[2]; } ulonglong2; +typedef union { unsigned long long data[4]; } ulonglong4; +typedef union { unsigned long long data[8]; } ulonglong8; +typedef union { unsigned long long data[16]; } ulonglong16; +typedef union { ulonglong4 data; } ulonglong3; + +typedef union { float data; } float1; +typedef union { float data[2]; } float2; +typedef union { float data[4]; } float4; +typedef union { float data[8]; } float8; +typedef union { float data[16]; } float16; +typedef union { float4 data; } float3; + +typedef union { double data; } double1; +typedef union { double data[2]; } double2; +typedef union { double data[4]; } double4; +typedef union { double data[8]; } double8; +typedef union { double data[16]; } double16; +typedef union { double4 data; } double3; + #endif // defined(_MSC_VER) +#endif // defined(__has_attribute) #endif diff --git a/include/hip/hcc_detail/hiprtc.h b/include/hip/hcc_detail/hiprtc.h index 624f1ea157..fecea75340 100644 --- a/include/hip/hcc_detail/hiprtc.h +++ b/include/hip/hcc_detail/hiprtc.h @@ -28,6 +28,10 @@ extern "C" { #include +#if !defined(_WIN32) +#pragma GCC visibility push (default) +#endif + enum hiprtcResult { HIPRTC_SUCCESS = 0, HIPRTC_ERROR_OUT_OF_MEMORY = 1, @@ -79,6 +83,10 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* code); hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* codeSizeRet); +#if !defined(_WIN32) +#pragma GCC visibility pop +#endif + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/include/hip/hcc_detail/host_defines.h b/include/hip/hcc_detail/host_defines.h index 11bd577f08..ad28cc7626 100644 --- a/include/hip/hcc_detail/host_defines.h +++ b/include/hip/hcc_detail/host_defines.h @@ -60,7 +60,7 @@ THE SOFTWARE. */ // _restrict is supported by the compiler #define __shared__ tile_static -#define __constant__ __attribute__((hc)) +#define __constant__ __attribute__((hc, annotate("__HIP_constant__"))) #elif defined(__clang__) && defined(__HIP__) @@ -72,7 +72,6 @@ THE SOFTWARE. #define __noinline__ __attribute__((noinline)) #define __forceinline__ inline __attribute__((always_inline)) -#define __hip_pinned_shadow__ __attribute__((hip_pinned_shadow)) #else diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h index 19def9ec7e..11985c3242 100644 --- a/include/hip/hcc_detail/math_functions.h +++ b/include/hip/hcc_detail/math_functions.h @@ -47,6 +47,19 @@ THE SOFTWARE. #include "kalmar_math.h" #endif +#if _LIBCPP_VERSION && __HIP__ +namespace std { +template <> +struct __numeric_type<_Float16> +{ + static _Float16 __test(_Float16); + + typedef _Float16 type; + static const bool value = true; +}; +} +#endif // _LIBCPP_VERSION + #pragma push_macro("__DEVICE__") #pragma push_macro("__RETURN_TYPE") @@ -1397,12 +1410,18 @@ float func(float x, int y) \ } __DEF_FLOAT_FUN2I(scalbn) -#if __HCC__ template __DEVICE__ inline static T min(T arg1, T arg2) { return (arg1 < arg2) ? arg1 : arg2; } +template +__DEVICE__ inline static T max(T arg1, T arg2) { + return (arg1 > arg2) ? arg1 : arg2; +} + +#if __HCC__ + __DEVICE__ inline static uint32_t min(uint32_t arg1, int32_t arg2) { return min(arg1, (uint32_t) arg2); } @@ -1424,11 +1443,6 @@ __DEVICE__ inline static unsigned long long min(long long arg1, unsigned long lo return min((unsigned long long) arg1, arg2); }*/ -template -__DEVICE__ inline static T max(T arg1, T arg2) { - return (arg1 > arg2) ? arg1 : arg2; -} - __DEVICE__ inline static uint32_t max(uint32_t arg1, int32_t arg2) { return max(arg1, (uint32_t) arg2); } diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index fca88f8094..6128a4c158 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -73,6 +73,9 @@ public: hsa_executable_t load_executable(const char*, const size_t, hsa_executable_t, hsa_agent_t); + hsa_executable_t load_executable_no_copy(const char*, const size_t, + hsa_executable_t, + hsa_agent_t); void* global_addr_by_name(const char* name); diff --git a/include/hip/hip_bfloat16.h b/include/hip/hip_bfloat16.h new file mode 100644 index 0000000000..ef09cf00d0 --- /dev/null +++ b/include/hip/hip_bfloat16.h @@ -0,0 +1,280 @@ +/** + * MIT License + * + * Copyright 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/*!\file + * \brief hip_bfloat16.h provides struct for hip_bfloat16 typedef + */ + +#ifndef _HIP_BFLOAT16_H_ +#define _HIP_BFLOAT16_H_ + +#if __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +// If this is a C compiler, C++ compiler below C++11, or a host-only compiler, we only +// include a minimal definition of hip_bfloat16 + +#include +/*! \brief Struct to represent a 16 bit brain floating point number. */ +typedef struct +{ + uint16_t data; +} hip_bfloat16; + +#else // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#include +#include +#include +#include +#include +#include + +struct hip_bfloat16 +{ + uint16_t data; + + enum truncate_t + { + truncate + }; + + __host__ __device__ hip_bfloat16() = default; + + // round upper 16 bits of IEEE float to convert to bfloat16 + explicit __host__ __device__ hip_bfloat16(float f) + : data(float_to_bfloat16(f)) + { + } + + explicit __host__ __device__ hip_bfloat16(float f, truncate_t) + : data(truncate_float_to_bfloat16(f)) + { + } + + // zero extend lower 16 bits of bfloat16 to convert to IEEE float + __host__ __device__ operator float() const + { + union + { + uint32_t int32; + float fp32; + } u = {uint32_t(data) << 16}; + return u.fp32; + } + + static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f) + { + hip_bfloat16 output; + output.data = float_to_bfloat16(f); + return output; + } + + static __host__ __device__ hip_bfloat16 round_to_bfloat16(float f, truncate_t) + { + hip_bfloat16 output; + output.data = truncate_float_to_bfloat16(f); + return output; + } + +private: + static __host__ __device__ uint16_t float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + if(~u.int32 & 0x7f800000) + { + // When the exponent bits are not all 1s, then the value is zero, normal, + // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus + // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd). + // This causes the bfloat16's mantissa to be incremented by 1 if the 16 + // least significant bits of the float mantissa are greater than 0x8000, + // or if they are equal to 0x8000 and the least significant bit of the + // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when + // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already + // has the value 0x7f, then incrementing it causes it to become 0x00 and + // the exponent is incremented by one, which is the next higher FP value + // to the unrounded bfloat16 value. When the bfloat16 value is subnormal + // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up + // to a normal value with an exponent of 0x01 and a mantissa of 0x00. + // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F, + // incrementing it causes it to become an exponent of 0xFF and a mantissa + // of 0x00, which is Inf, the next higher value to the unrounded value. + u.int32 += 0x7fff + ((u.int32 >> 16) & 1); // Round to nearest, round to even + } + else if(u.int32 & 0xffff) + { + // When all of the exponent bits are 1, the value is Inf or NaN. + // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero + // mantissa bit. Quiet NaN is indicated by the most significant mantissa + // bit being 1. Signaling NaN is indicated by the most significant + // mantissa bit being 0 but some other bit(s) being 1. If any of the + // lower 16 bits of the mantissa are 1, we set the least significant bit + // of the bfloat16 mantissa, in order to preserve signaling NaN in case + // the bloat16's mantissa bits are all 0. + u.int32 |= 0x10000; // Preserve signaling NaN + } + return uint16_t(u.int32 >> 16); + } + + // Truncate instead of rounding, preserving SNaN + static __host__ __device__ uint16_t truncate_float_to_bfloat16(float f) + { + union + { + float fp32; + uint32_t int32; + } u = {f}; + return uint16_t(u.int32 >> 16) | (!(~u.int32 & 0x7f800000) && (u.int32 & 0xffff)); + } +}; + +typedef struct +{ + uint16_t data; +} hip_bfloat16_public; + +static_assert(std::is_standard_layout{}, + "hip_bfloat16 is not a standard layout type, and thus is " + "incompatible with C."); + +static_assert(std::is_trivial{}, + "hip_bfloat16 is not a trivial type, and thus is " + "incompatible with C."); + +static_assert(sizeof(hip_bfloat16) == sizeof(hip_bfloat16_public) + && offsetof(hip_bfloat16, data) == offsetof(hip_bfloat16_public, data), + "internal hip_bfloat16 does not match public hip_bfloat16"); + +inline std::ostream& operator<<(std::ostream& os, const hip_bfloat16& bf16) +{ + return os << float(bf16); +} +inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a) +{ + return a; +} +inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a) +{ + a.data ^= 0x8000; + return a; +} +inline __host__ __device__ hip_bfloat16 operator+(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) + float(b)); +} +inline __host__ __device__ hip_bfloat16 operator-(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) - float(b)); +} +inline __host__ __device__ hip_bfloat16 operator*(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) * float(b)); +} +inline __host__ __device__ hip_bfloat16 operator/(hip_bfloat16 a, hip_bfloat16 b) +{ + return hip_bfloat16(float(a) / float(b)); +} +inline __host__ __device__ bool operator<(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) < float(b); +} +inline __host__ __device__ bool operator==(hip_bfloat16 a, hip_bfloat16 b) +{ + return float(a) == float(b); +} +inline __host__ __device__ bool operator>(hip_bfloat16 a, hip_bfloat16 b) +{ + return b < a; +} +inline __host__ __device__ bool operator<=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a > b); +} +inline __host__ __device__ bool operator!=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a == b); +} +inline __host__ __device__ bool operator>=(hip_bfloat16 a, hip_bfloat16 b) +{ + return !(a < b); +} +inline __host__ __device__ hip_bfloat16& operator+=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a + b; +} +inline __host__ __device__ hip_bfloat16& operator-=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a - b; +} +inline __host__ __device__ hip_bfloat16& operator*=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a * b; +} +inline __host__ __device__ hip_bfloat16& operator/=(hip_bfloat16& a, hip_bfloat16 b) +{ + return a = a / b; +} +inline __host__ __device__ hip_bfloat16& operator++(hip_bfloat16& a) +{ + return a += hip_bfloat16(1.0f); +} +inline __host__ __device__ hip_bfloat16& operator--(hip_bfloat16& a) +{ + return a -= hip_bfloat16(1.0f); +} +inline __host__ __device__ hip_bfloat16 operator++(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + ++a; + return orig; +} +inline __host__ __device__ hip_bfloat16 operator--(hip_bfloat16& a, int) +{ + hip_bfloat16 orig = a; + --a; + return orig; +} + +namespace std +{ + constexpr __host__ __device__ bool isinf(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && !(a.data & 0x7f); + } + constexpr __host__ __device__ bool isnan(hip_bfloat16 a) + { + return !(~a.data & 0x7f80) && +(a.data & 0x7f); + } + constexpr __host__ __device__ bool iszero(hip_bfloat16 a) + { + return !(a.data & 0x7fff); + } +} + +#endif // __cplusplus < 201103L || (!defined(__HCC__) && !defined(__HIPCC__)) + +#endif // _HIP_BFLOAT16_H_ diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index cf6a64ad65..b0974aeef6 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -321,7 +321,6 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeIntegrated, ///< iGPU hipDeviceAttributeCooperativeLaunch, ///< Support cooperative launch hipDeviceAttributeCooperativeMultiDeviceLaunch, ///< Support cooperative launch on multiple devices - hipDeviceAttributeMaxTexture1DWidth, ///< Maximum number of elements in 1D images hipDeviceAttributeMaxTexture2DWidth, ///< Maximum dimension width of 2D images in image elements hipDeviceAttributeMaxTexture2DHeight, ///< Maximum dimension height of 2D images in image elements diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index 6e0d02d0c0..3890028950 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -186,6 +186,7 @@ typedef struct cudaArray hipArray; typedef struct cudaArray* hipArray_t; typedef struct cudaArray* hipArray_const_t; typedef struct cudaFuncAttributes hipFuncAttributes; +typedef struct cudaLaunchParams hipLaunchParams; #define hipFunction_attribute CUfunction_attribute #define hip_Memcpy2D CUDA_MEMCPY2D #define hipMemcpy3DParms cudaMemcpy3DParms @@ -860,7 +861,7 @@ inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, } -inline hipError_t hipMemcpyWithStream(void* dst, const void* src, +inline static hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream) { cudaError_t error = cudaMemcpyAsync(dst, src, sizeBytes, @@ -1134,6 +1135,10 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev p_prop->integrated = cdprop.integrated; p_prop->cooperativeLaunch = cdprop.cooperativeLaunch; p_prop->cooperativeMultiDeviceLaunch = cdprop.cooperativeMultiDeviceLaunch; + p_prop->cooperativeMultiDeviceUnmatchedFunc = 0; + p_prop->cooperativeMultiDeviceUnmatchedGridDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedBlockDim = 0; + p_prop->cooperativeMultiDeviceUnmatchedSharedMem = 0; p_prop->maxTexture1D = cdprop.maxTexture1D; p_prop->maxTexture2D[0] = cdprop.maxTexture2D[0]; @@ -1271,6 +1276,12 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att case hipDeviceAttributeEccEnabled: cdattr = cudaDevAttrEccEnabled; break; + case hipDeviceAttributeCooperativeLaunch: + cdattr = cudaDevAttrCooperativeLaunch; + break; + case hipDeviceAttributeCooperativeMultiDeviceLaunch: + cdattr = cudaDevAttrCooperativeMultiDeviceLaunch; + break; default: return hipCUDAErrorTohipError(cudaErrorInvalidValue); } @@ -1284,10 +1295,50 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBl const void* func, int blockSize, size_t dynamicSMemSize) { - cudaError_t cerror; - cerror = - cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, blockSize, dynamicSMemSize); - return hipCUDAErrorTohipError(cerror); + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* func, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize ){ + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, f, + blockSize, dynamicSMemSize)); +} + +inline static hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags ) { + return hipCUResultTohipError(cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks,f, + blockSize, dynamicSMemSize, flags)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSize(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit)); +} + +//TODO - Match CUoccupancyB2DSize +inline static hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags){ + return hipCUResultTohipError(cuOccupancyMaxPotentialBlockSizeWithFlags(gridSize, blockSize, f, NULL, + dynSharedMemPerBlk, blockSizeLimit, flags)); } inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void* ptr) { @@ -1679,6 +1730,17 @@ inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_ return hipCUDAErrorTohipError(cudaGetChannelDesc(desc,array)); } +inline static hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, + hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + +inline static hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, unsigned int flags) { + return hipCUDAErrorTohipError(cudaLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags)); +} #ifdef __cplusplus } @@ -1686,13 +1748,36 @@ inline static hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_ #ifdef __CUDACC__ +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + T func, + int blockSize, + size_t dynamicSMemSize) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, func, + blockSize, dynamicSMemSize)); +} + template inline static hipError_t hipOccupancyMaxPotentialBlockSize(int* minGridSize, int* blockSize, T func, size_t dynamicSMemSize = 0, int blockSizeLimit = 0) { - cudaError_t cerror; - cerror = cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, dynamicSMemSize, blockSizeLimit); - return hipCUDAErrorTohipError(cerror); + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit)); +} + +template +inline static hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* minGridSize, int* blockSize, T func, + size_t dynamicSMemSize = 0, + int blockSizeLimit = 0, unsigned int flags = 0) { + return hipCUDAErrorTohipError(cudaOccupancyMaxPotentialBlockSize(minGridSize, blockSize, func, + dynamicSMemSize, blockSizeLimit, flags)); +} + +template +inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, T func, + int blockSize, size_t dynamicSMemSize,unsigned int flags) { + return hipCUDAErrorTohipError(cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func, + blockSize, dynamicSMemSize, flags)); } template @@ -1742,6 +1827,14 @@ template inline static hipChannelFormatDesc hipCreateChannelDesc() { return cudaCreateChannelDesc(); } + +template +inline static hipError_t hipLaunchCooperativeKernel(T f, dim3 gridDim, dim3 blockDim, + void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream) { + return hipCUDAErrorTohipError( + cudaLaunchCooperativeKernel(f, gridDim, blockDim, kernelParams, sharedMemBytes, stream)); +} + #endif //__CUDACC__ #endif // HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_RUNTIME_API_H diff --git a/lpl_ca/CMakeLists.txt b/lpl_ca/CMakeLists.txt index ac01a6a0ab..f626b88d89 100644 --- a/lpl_ca/CMakeLists.txt +++ b/lpl_ca/CMakeLists.txt @@ -14,7 +14,7 @@ install(TARGETS lpl RUNTIME DESTINATION bin) #-------------------------------------LPL--------------------------------------# #-------------------------------------CA---------------------------------------# -add_executable(ca ca.cpp ${PROJECT_SOURCE_DIR}/src/code_object_bundle.cpp) +add_executable(ca ca.cpp) set_target_properties( ca PROPERTIES CXX_STANDARD 11 diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp index db63f02498..2d691cd38a 100644 --- a/lpl_ca/ca.hpp +++ b/lpl_ca/ca.hpp @@ -2,7 +2,7 @@ #include "common.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "../src/code_object_bundle.inl" #include "clara/clara.hpp" diff --git a/packaging/convert_md_to_html.sh b/packaging/convert_md_to_html.sh index e6442a9ef2..fc222914bb 100755 --- a/packaging/convert_md_to_html.sh +++ b/packaging/convert_md_to_html.sh @@ -21,24 +21,22 @@ trap cleanup EXIT export GRIPURL=$hip_srcdir export GRIPHOME=$workdir echo "CACHE_DIRECTORY = '$html_destdir/asset'" > $workdir/settings.py -mkdir -p $html_destdir $html_destdir/hipify-clang $html_destdir/docs/markdown +mkdir -p $html_destdir $html_destdir/docs/markdown # convert all md files to html pushd $hip_srcdir -for f in *.md hipify-clang/*.md docs/markdown/*.md; do grip --export --no-inline $f $html_destdir/${f%.*}.html; done +for f in *.md docs/markdown/*.md; do grip --export --no-inline $f $html_destdir/${f%.*}.html; done popd # convert absolute links to relative links pushd $html_destdir for f in *.html; do sed -i "s?$GRIPURL/??g" $f; done -for f in hipify-clang/*.html; do sed -i "s?$GRIPURL/?../?g" $f; done for f in docs/markdown/*.html; do sed -i "s?$GRIPURL/?../../?g" $f; done popd # update document titles pushd $html_destdir for f in *.html; do sed -i "s?.md - Grip??g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md - Grip??g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md - Grip??g" $f; done popd @@ -46,8 +44,6 @@ popd pushd $html_destdir for f in *.html; do sed -i "s?.md\"?.html\"?g" $f; done for f in *.html; do sed -i "s?.md#?.html#?g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md\"?.html\"?g" $f; done -for f in hipify-clang/*.html; do sed -i "s?.md#?.html#?g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md\"?.html\"?g" $f; done for f in docs/markdown/*.html; do sed -i "s?.md#?.html#?g" $f; done popd diff --git a/packaging/hip-base.txt b/packaging/hip-base.txt index 4ff936dba4..fc8becf84f 100644 --- a/packaging/hip-base.txt +++ b/packaging/hip-base.txt @@ -25,16 +25,15 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0), llvm-amdgpu") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_base") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "perl (>= 5.0)") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-base") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_base") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_base") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") -set(CPACK_RPM_PACKAGE_REQUIRES "perl >= 5.0, llvm-amdgpu") +set(CPACK_RPM_PACKAGE_REQUIRES "perl >= 5.0") set(CPACK_RPM_PACKAGE_OBSOLETES "hip_base") set(CPACK_RPM_PACKAGE_CONFLICTS "hip_base") set(CPACK_BINARY_RPM "ON") diff --git a/packaging/hip-doc.txt b/packaging/hip-doc.txt index d97ddc7d3a..41db246d31 100644 --- a/packaging/hip-doc.txt +++ b/packaging/hip-doc.txt @@ -32,9 +32,8 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_doc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-doc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_doc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_doc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") diff --git a/packaging/hip-hcc.txt b/packaging/hip-hcc.txt index 21e138e1ed..d084e8d966 100644 --- a/packaging/hip-hcc.txt +++ b/packaging/hip-hcc.txt @@ -12,7 +12,7 @@ if(NOT @HIP_COMPILER@ STREQUAL "clang") endif() install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) +install(FILES @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets.cmake @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets-release.cmake DESTINATION lib/cmake/hip) ############################# # Packaging steps @@ -37,9 +37,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), ${HCC_PACKAGE_NAME} (= @HCC_PACKAGE_VERSION@), comgr (>= 1.1)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_hcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-hcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_hcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_hcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-nvcc.txt b/packaging/hip-nvcc.txt index dc36b628c7..4b11939609 100644 --- a/packaging/hip-nvcc.txt +++ b/packaging/hip-nvcc.txt @@ -19,9 +19,8 @@ set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION}), cuda (>= 7.5)") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_nvcc") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-nvcc") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_nvcc") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_nvcc") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") diff --git a/packaging/hip-samples.txt b/packaging/hip-samples.txt index 737f048d8f..6f643c3865 100644 --- a/packaging/hip-samples.txt +++ b/packaging/hip-samples.txt @@ -19,15 +19,14 @@ set(CPACK_PACKAGE_VERSION_PATCH @HIP_VERSION_PATCH@) set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}) set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") -set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-vdi (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_samples") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip-base (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip-samples") set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_samples") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_samples") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) -set(CPACK_RPM_PACKAGE_REQUIRES "hip-vdi = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_REQUIRES "hip-rocclr = ${HIP_BASE_VERSION}") set(CPACK_RPM_PACKAGE_OBSOLETES "hip_samples") set(CPACK_RPM_PACKAGE_CONFLICTS "hip_samples") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") diff --git a/packaging/hip-targets-release.cmake b/packaging/hip-targets-release.cmake deleted file mode 100644 index 90fd961184..0000000000 --- a/packaging/hip-targets-release.cmake +++ /dev/null @@ -1,58 +0,0 @@ -#---------------------------------------------------------------- -# Generated CMake target import file for configuration "Release". -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -if(NOT HIP_RUNTIME MATCHES "VDI") -# Import target "hip::hip_hcc_static" for configuration "Release" - set_property(TARGET hip::hip_hcc_static APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - if(HIP_COMPILER STREQUAL "clang") - set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) - else() - set_target_properties(hip::hip_hcc_static PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES_RELEASE "CXX" - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" - ) - endif() - list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc_static ) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc_static "${_IMPORT_PREFIX}/lib/libhip_hcc_static.a" ) - - # Import target "hip::hip_hcc" for configuration "Release" - set_property(TARGET hip::hip_hcc APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - if(HIP_COMPILER STREQUAL "clang") - set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so") - else() - set_target_properties(hip::hip_hcc PROPERTIES - IMPORTED_LINK_INTERFACE_LIBRARIES_RELEASE "hcc::hccrt;hcc::hc_am" - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libhip_hcc.so" - IMPORTED_SONAME_RELEASE "libhip_hcc.so") - endif() - list(APPEND _IMPORT_CHECK_TARGETS hip::hip_hcc ) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::hip_hcc "${_IMPORT_PREFIX}/lib/libhip_hcc.so" ) - -else() - - set_property(TARGET hip::amdhip64 APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE) - set_target_properties(hip::amdhip64 PROPERTIES - IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib/libamdhip64.so" - IMPORTED_SONAME_RELEASE "libamdhip64.so") - list(APPEND _IMPORT_CHECK_TARGETS hip::amdhip64) - list(APPEND _IMPORT_CHECK_FILES_FOR_hip::amdhip64 "${_IMPORT_PREFIX}/lib/libamdhip64.so" ) - -endif() - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) diff --git a/packaging/hip-targets.cmake b/packaging/hip-targets.cmake deleted file mode 100644 index ac72419f52..0000000000 --- a/packaging/hip-targets.cmake +++ /dev/null @@ -1,160 +0,0 @@ -# Generated by CMake 3.5.1 - -if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.5) - message(FATAL_ERROR "CMake >= 2.6.0 required") -endif() -cmake_policy(PUSH) -cmake_policy(VERSION 2.6) -#---------------------------------------------------------------- -# Generated CMake target import file. -#---------------------------------------------------------------- - -# Commands may need to know the format version. -set(CMAKE_IMPORT_FILE_VERSION 1) - -# Protect against multiple inclusion, which would fail when already imported targets are added once more. -set(_targetsDefined) -set(_targetsNotDefined) -set(_expectedTargets) -if(HIP_RUNTIME MATCHES "VDI") - foreach(_expectedTarget hip::amdhip64 hip::host hip::device) -else() - foreach(_expectedTarget hip:hip_hcc_static hip::hip_hcc hip::host hip::device) -endif() - list(APPEND _expectedTargets ${_expectedTarget}) - if(NOT TARGET ${_expectedTarget}) - list(APPEND _targetsNotDefined ${_expectedTarget}) - endif() - if(TARGET ${_expectedTarget}) - list(APPEND _targetsDefined ${_expectedTarget}) - endif() -endforeach() -if("${_targetsDefined}" STREQUAL "${_expectedTargets}") - set(CMAKE_IMPORT_FILE_VERSION) - cmake_policy(POP) - return() -endif() -if(NOT "${_targetsDefined}" STREQUAL "") - message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_targetsDefined}\nTargets not yet defined: ${_targetsNotDefined}\n") -endif() -unset(_targetsDefined) -unset(_targetsNotDefined) -unset(_expectedTargets) - -#If HIP isnot installed under ROCm, need this to find HSA assuming HSA is under ROCm -if( DEFINED ENV{ROCM_PATH} ) - set(ROCM_PATH "$ENV{ROCM_PATH}") -endif() - -#get_filename_component cannot resolve the symlinks if called from /opt/rocm/lib/hip -#and do three level up again -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_DIR}" REALPATH) -get_filename_component(_IMPORT_PREFIX "${_DIR}/../../../" REALPATH) - -# Create imported target hip::hip_hcc_static -if( NOT HIP_RUNTIME MATCHES "VDI") - add_library(hip::hip_hcc_static STATIC IMPORTED) -endif() - -#if HSA is not under ROCm then provide CMAKE_PREFIX_PATH= -find_path(HSA_HEADER hsa/hsa.h - PATHS - "${ROCM_PATH}/include" - #Assuming HIP is installed under ROCm - "${_IMPORT_PREFIX}/../include" - /opt/rocm/include -) - -if (HSA_HEADER-NOTFOUND) - message (FATAL_ERROR "HSA header not found! ROCM_PATH environment not set") -endif() -if(HIP_RUNTIME MATCHES "VDI") - # Create imported target hip::amdhip64 - add_library(hip::amdhip64 SHARED IMPORTED) - - set_target_properties(hip::amdhip64 PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - ) -else() - set_target_properties(hip::hip_hcc_static PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}") - - # Create imported target hip::hip_hcc - add_library(hip::hip_hcc SHARED IMPORTED) - - set_target_properties(hip::hip_hcc PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include;${HSA_HEADER}" - ) -endif() - -# Create imported target hip::host -add_library(hip::host INTERFACE IMPORTED) - -if(HIP_RUNTIME MATCHES "VDI") - set_target_properties(hip::host PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::amdhip64") -else() - set_target_properties(hip::host PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::hip_hcc") -endif() - - -# Create imported target hip::device -add_library(hip::device INTERFACE IMPORTED) - -if(HIP_COMPILER STREQUAL "hcc") -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host;hcc::hccrt;hcc::hc_am" - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/../include" -) -else() -set_target_properties(hip::device PROPERTIES - INTERFACE_LINK_LIBRARIES "hip::host" - INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" - INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include" -) -endif() - -if(CMAKE_VERSION VERSION_LESS 3.0.0) - message(FATAL_ERROR "This file relies on consumers using CMake 3.0.0 or greater.") -endif() - -# Load information for each installed configuration. -get_filename_component(_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -file(GLOB CONFIG_FILES "${_DIR}/hip-targets-*.cmake") -foreach(f ${CONFIG_FILES}) - include(${f}) -endforeach() - -# Cleanup temporary variables. -set(_IMPORT_PREFIX) - -# Loop over all imported files and verify that they actually exist -foreach(target ${_IMPORT_CHECK_TARGETS} ) - foreach(file ${_IMPORT_CHECK_FILES_FOR_${target}} ) - if(NOT EXISTS "${file}" ) - message(FATAL_ERROR "The imported target \"${target}\" references the file - \"${file}\" -but this file does not exist. Possible reasons include: -* The file was deleted, renamed, or moved to another location. -* An install or uninstall procedure did not complete successfully. -* The installation package was faulty and contained - \"${CMAKE_CURRENT_LIST_FILE}\" -but not all the files it references. -") - endif() - endforeach() - unset(_IMPORT_CHECK_FILES_FOR_${target}) -endforeach() -unset(_IMPORT_CHECK_TARGETS) - -# This file does not depend on other imported targets which have -# been exported from the same project but in a separate export set. - -# Commands beyond this point should not need to know the version. -set(CMAKE_IMPORT_FILE_VERSION) -cmake_policy(POP) diff --git a/packaging/hip-vdi.txt b/packaging/hip-vdi.txt index c80e4aed6f..04eb892cc8 100644 --- a/packaging/hip-vdi.txt +++ b/packaging/hip-vdi.txt @@ -1,22 +1,35 @@ cmake_minimum_required(VERSION 2.8.3) -project(hip_vdi) +project(hip_rocclr) install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so.@HIP_LIB_VERSION_MAJOR@ DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64.so.@HIP_LIB_VERSION_STRING@ DESTINATION lib) +install(FILES @PROJECT_BINARY_DIR@/lib/libamdhip64_static.a DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhip_hcc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/lib/libhiprtc.so DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/.hipInfo DESTINATION lib) install(FILES @PROJECT_BINARY_DIR@/hip-config.cmake @PROJECT_BINARY_DIR@/hip-config-version.cmake DESTINATION lib/cmake/hip) -install(FILES @hip_SOURCE_DIR@/packaging/hip-targets.cmake @hip_SOURCE_DIR@/packaging/hip-targets-release.cmake DESTINATION lib/cmake/hip) ############################# # Packaging steps ############################# set(CPACK_SET_DESTDIR TRUE) -set(CPACK_INSTALL_PREFIX "/opt/rocm/hip") -set(CPACK_PACKAGE_NAME "hip-vdi") -set(HCC_PACKAGE_NAME "vdi") -set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [VDI]") +set(CPACK_INSTALL_PREFIX @CPACK_INSTALL_PREFIX@) + +## cmake generated target files contains IMPORTED_LOCATION_RELEASE etc. which +## is installation path when building the project, which may be different from +## the intallation path for packaging. These paths have to be replaced by +## the package installation path, otherwise apps using pkg-config will fail. +file(GLOB _target_files @CONFIG_PACKAGE_INSTALL_DIR@/hip-targets*.cmake) +foreach(_target_file ${_target_files}) + execute_process(COMMAND sed -i s:@CMAKE_INSTALL_PREFIX@:${CPACK_INSTALL_PREFIX}:g ${_target_file}) +endforeach() +install(FILES ${_target_files} DESTINATION lib/cmake/hip) + +set(CPACK_PACKAGE_NAME "hip-rocclr") +set(HCC_PACKAGE_NAME "rocclr") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HIP: Heterogenous-computing Interface for Portability [ROCClr]") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") set(CPACK_PACKAGE_CONTACT "Maneesh Gupta ") set(CPACK_PACKAGE_VERSION @HIP_VERSION_MAJOR@.@HIP_VERSION_MINOR@.@HIP_VERSION_PATCH@) @@ -27,28 +40,20 @@ set(CPACK_PACKAGE_FILE_NAME ${CPACK_PACKAGE_NAME}-${CPACK_PACKAGE_VERSION_MAJOR} set(CPACK_GENERATOR "TGZ;DEB;RPM") set(CPACK_BINARY_DEB "ON") set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJECT_BINARY_DIR}/postinst;${PROJECT_BINARY_DIR}/prerm") -if(@COMPILE_HIP_ATP_MARKER@) - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), rocm-profiler, comgr (>= 1.1)") -else() - set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1)") -endif() -set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_vdi, hip-hcc (= ${CPACK_PACKAGE_VERSION})") -set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_vdi") -set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base (= ${CPACK_PACKAGE_VERSION}), comgr (>= 1.1), llvm-amdgpu") +set(CPACK_DEBIAN_PACKAGE_PROVIDES "hip_rocclr, hip-hcc (= ${CPACK_PACKAGE_VERSION})") +set(CPACK_DEBIAN_PACKAGE_REPLACES "hip_rocclr") +set(CPACK_DEBIAN_PACKAGE_CONFLICTS "hip_rocclr") set(CPACK_BINARY_RPM "ON") set(CPACK_RPM_PACKAGE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}") set(CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/postinst") set(CPACK_RPM_PRE_UNINSTALL_SCRIPT_FILE "${PROJECT_BINARY_DIR}/prerm") set(CPACK_RPM_PACKAGE_AUTOREQPROV " no") string(REPLACE "-" "_" HIP_BASE_VERSION ${CPACK_PACKAGE_VERSION}) -if(@COMPILE_HIP_ATP_MARKER@) - set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, rocm-profiler, comgr >= 1.1") -else() - set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1") -endif() -set(CPACK_RPM_PACKAGE_PROVIDES "hip_vdi, hip-hcc = ${HIP_BASE_VERSION}") -set(CPACK_RPM_PACKAGE_OBSOLETES "hip_vdi") -set(CPACK_RPM_PACKAGE_CONFLICTS "hip_vdi") +set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr-dev, hsa-ext-rocr-dev, rocm-utils, hip-base = ${HIP_BASE_VERSION}, comgr >= 1.1, llvm-amdgpu") +set(CPACK_RPM_PACKAGE_PROVIDES "hip_rocclr, hip-hcc = ${HIP_BASE_VERSION}") +set(CPACK_RPM_PACKAGE_OBSOLETES "hip_rocclr") +set(CPACK_RPM_PACKAGE_CONFLICTS "hip_rocclr") set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "/opt") set(CPACK_SOURCE_GENERATOR "TGZ") include(CPack) diff --git a/samples/0_Intro/bit_extract/Makefile b/samples/0_Intro/bit_extract/Makefile index 08bca6e642..4a3a0bb4fe 100644 --- a/samples/0_Intro/bit_extract/Makefile +++ b/samples/0_Intro/bit_extract/Makefile @@ -13,10 +13,15 @@ ifeq (${HIP_PLATFORM}, nvcc) endif EXE=bit_extract +EXE_STATIC=bit_extract_static $(EXE): bit_extract.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ +$(EXE_STATIC): bit_extract.cpp + $(HIPCC) -use-staticlib $(HIPCC_FLAGS) $< -o $@ + +all: $(EXE) $(EXE_STATIC) clean: - rm -f *.o $(EXE) + rm -f *.o $(EXE) $(EXE_STATIC) diff --git a/samples/0_Intro/module_api/defaultDriver.cpp b/samples/0_Intro/module_api/defaultDriver.cpp index ea36aabcf4..af8b413ac2 100644 --- a/samples/0_Intro/module_api/defaultDriver.cpp +++ b/samples/0_Intro/module_api/defaultDriver.cpp @@ -80,8 +80,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/launchKernelHcc.cpp b/samples/0_Intro/module_api/launchKernelHcc.cpp index 38cf0d414c..90e569c5bc 100644 --- a/samples/0_Intro/module_api/launchKernelHcc.cpp +++ b/samples/0_Intro/module_api/launchKernelHcc.cpp @@ -107,8 +107,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api/runKernel.cpp b/samples/0_Intro/module_api/runKernel.cpp index a011b42666..1093b0dd54 100644 --- a/samples/0_Intro/module_api/runKernel.cpp +++ b/samples/0_Intro/module_api/runKernel.cpp @@ -99,8 +99,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/module_api_global/runKernel.cpp b/samples/0_Intro/module_api_global/runKernel.cpp index 3a2804b7a2..4a2d49144c 100644 --- a/samples/0_Intro/module_api_global/runKernel.cpp +++ b/samples/0_Intro/module_api_global/runKernel.cpp @@ -154,8 +154,8 @@ int main() { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); return 0; } diff --git a/samples/0_Intro/square/Makefile b/samples/0_Intro/square/Makefile index 9bb0dd8205..aa046eeaaa 100644 --- a/samples/0_Intro/square/Makefile +++ b/samples/0_Intro/square/Makefile @@ -11,7 +11,7 @@ else SOURCES=square.cpp endif -all: square.out +all: square.out square.out.static # Step square.cpp: square.cu @@ -20,5 +20,8 @@ square.cpp: square.cu square.out: $(SOURCES) $(HIPCC) $(CXXFLAGS) $(SOURCES) -o $@ +square.out.static: $(SOURCES) + $(HIPCC) -use-staticlib $(CXXFLAGS) $(SOURCES) -o $@ + clean: - rm -f *.o *.out square.cpp + rm -f *.o *.out *.out.static square.cpp diff --git a/samples/0_Intro/square/README.md b/samples/0_Intro/square/README.md index 7a9e04fc5f..c185903993 100644 --- a/samples/0_Intro/square/README.md +++ b/samples/0_Intro/square/README.md @@ -1,6 +1,6 @@ # Square.md -Simple test which shows how to use hipify to port CUDA code to HIP. +Simple test which shows how to use hipify-perl to port CUDA code to HIP. See related [blog](http://gpuopen.com/hip-to-be-squared-an-introductory-hip-tutorial) that explains the example. Now it is even simpler and requires no manual modification to the hipified source code - just hipify and compile: diff --git a/samples/1_Utils/hipInfo/hipInfo.cpp b/samples/1_Utils/hipInfo/hipInfo.cpp index e17f19675a..14faa7671b 100644 --- a/samples/1_Utils/hipInfo/hipInfo.cpp +++ b/samples/1_Utils/hipInfo/hipInfo.cpp @@ -56,6 +56,7 @@ void printCompilerInfo() { #endif } +double bytesToKB(size_t s) { return (double)s / (1024.0); } double bytesToGB(size_t s) { return (double)s / (1024.0 * 1024.0 * 1024.0); } #define printLimit(w1, limit, units) \ @@ -97,7 +98,7 @@ void printDeviceProp(int deviceId) { cout << setw(w1) << "totalGlobalMem: " << fixed << setprecision(2) << bytesToGB(props.totalGlobalMem) << " GB" << endl; cout << setw(w1) << "maxSharedMemoryPerMultiProcessor: " << fixed << setprecision(2) - << bytesToGB(props.maxSharedMemoryPerMultiProcessor) << " GB" << endl; + << bytesToKB(props.maxSharedMemoryPerMultiProcessor) << " KB" << endl; cout << setw(w1) << "totalConstMem: " << props.totalConstMem << endl; cout << setw(w1) << "sharedMemPerBlock: " << (float)props.sharedMemPerBlock / 1024.0 << " KB" << endl; diff --git a/samples/2_Cookbook/0_MatrixTranspose/Readme.md b/samples/2_Cookbook/0_MatrixTranspose/Readme.md index 9e1a342a07..432f9180dc 100644 --- a/samples/2_Cookbook/0_MatrixTranspose/Readme.md +++ b/samples/2_Cookbook/0_MatrixTranspose/Readme.md @@ -96,6 +96,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/10_inline_asm/Readme.md b/samples/2_Cookbook/10_inline_asm/Readme.md index f65bbdcf20..e86085b648 100644 --- a/samples/2_Cookbook/10_inline_asm/Readme.md +++ b/samples/2_Cookbook/10_inline_asm/Readme.md @@ -55,6 +55,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp b/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp index 6fd49fdb0f..5831da0e9d 100644 --- a/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp +++ b/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp @@ -21,11 +21,7 @@ THE SOFTWARE. */ #include "hip/hip_runtime.h" -#if __HIP__ -__hip_pinned_shadow__ -#else -extern -#endif + texture tex; extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { diff --git a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp old mode 100755 new mode 100644 index b42ac86ad1..01729222ee --- a/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ b/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp @@ -27,7 +27,6 @@ THE SOFTWARE. #define fileName "tex2dKernel.code" -texture tex; bool testResult = true; #define HIP_CHECK(cmd) \ @@ -122,7 +121,7 @@ bool runTest(int argc, char** argv) { } } } - HIP_CHECK(hipUnbindTexture(tex)); + HIP_CHECK(hipUnbindTexture(texref)); HIP_CHECK(hipFree(dData)); HIP_CHECK(hipFreeArray(array)); return testResult; diff --git a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md index 1430e58ecc..937da30af0 100644 --- a/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md +++ b/samples/2_Cookbook/12_cmake_hip_add_executable/Readme.md @@ -48,6 +48,6 @@ make - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/13_occupancy/occupancy.cpp b/samples/2_Cookbook/13_occupancy/occupancy.cpp index b4a90c1eee..e772e82b1d 100644 --- a/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ b/samples/2_Cookbook/13_occupancy/occupancy.cpp @@ -58,9 +58,9 @@ void launchKernel(float* C, float* A, float* B, bool manual){ const unsigned threadsperblock = 32; const unsigned blocks = (NUM/threadsperblock)+1; - uint32_t mingridSize = 0; - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int mingridSize = 0; + int gridSize = 0; + int blockSize = 0; if (manual){ blockSize = threadsperblock; @@ -88,7 +88,7 @@ void launchKernel(float* C, float* A, float* B, bool manual){ printf("kernel Execution time = %6.3fms\n", eventMs); //Calculate Occupancy - uint32_t numBlock = 0; + int numBlock = 0; HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); if(devProp.maxThreadsPerMultiProcessor){ diff --git a/samples/2_Cookbook/1_hipEvent/Readme.md b/samples/2_Cookbook/1_hipEvent/Readme.md index c12c76e701..2bd389e25e 100644 --- a/samples/2_Cookbook/1_hipEvent/Readme.md +++ b/samples/2_Cookbook/1_hipEvent/Readme.md @@ -75,6 +75,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/3_shared_memory/Readme.md b/samples/2_Cookbook/3_shared_memory/Readme.md index ad23d58f73..756cb6e7f2 100644 --- a/samples/2_Cookbook/3_shared_memory/Readme.md +++ b/samples/2_Cookbook/3_shared_memory/Readme.md @@ -37,6 +37,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/4_shfl/Readme.md b/samples/2_Cookbook/4_shfl/Readme.md index 6adc98fb4e..ac5dff9292 100644 --- a/samples/2_Cookbook/4_shfl/Readme.md +++ b/samples/2_Cookbook/4_shfl/Readme.md @@ -48,6 +48,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/5_2dshfl/Readme.md b/samples/2_Cookbook/5_2dshfl/Readme.md index cc9484377b..fa10c71d6c 100644 --- a/samples/2_Cookbook/5_2dshfl/Readme.md +++ b/samples/2_Cookbook/5_2dshfl/Readme.md @@ -50,6 +50,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/6_dynamic_shared/Readme.md b/samples/2_Cookbook/6_dynamic_shared/Readme.md index 047cc94278..68782807bf 100644 --- a/samples/2_Cookbook/6_dynamic_shared/Readme.md +++ b/samples/2_Cookbook/6_dynamic_shared/Readme.md @@ -44,6 +44,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/7_streams/Readme.md b/samples/2_Cookbook/7_streams/Readme.md index 1c9186791c..14b6a9762a 100644 --- a/samples/2_Cookbook/7_streams/Readme.md +++ b/samples/2_Cookbook/7_streams/Readme.md @@ -58,6 +58,6 @@ Use hipcc to build the application, which is using hcc on AMD and nvcc on nvidia - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/samples/2_Cookbook/9_unroll/Readme.md b/samples/2_Cookbook/9_unroll/Readme.md index c6b8a8cf35..6fad55e3c9 100644 --- a/samples/2_Cookbook/9_unroll/Readme.md +++ b/samples/2_Cookbook/9_unroll/Readme.md @@ -43,6 +43,6 @@ please make sure you have a 3.0 or higher compute capable device in order to use - [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_porting_guide.md) - [HIP Terminology](https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) -- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIP/blob/master/hipify-clang/README.md) +- [HIPIFY](https://github.com/ROCm-Developer-Tools/HIPIFY/blob/master/README.md) - [Developer/CONTRIBUTING Info](https://github.com/ROCm-Developer-Tools/HIP/blob/master/CONTRIBUTING.md) - [Release Notes](https://github.com/ROCm-Developer-Tools/HIP/blob/master/RELEASE.md) diff --git a/src/code_object_bundle.cpp b/src/code_object_bundle.cpp deleted file mode 100644 index feef90a61a..0000000000 --- a/src/code_object_bundle.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "../include/hip/hcc_detail/code_object_bundle.hpp" - -#include - -#include -#include -#include -#include -#include - -using namespace std; - -// CREATORS -hip_impl::Bundled_code_header::Bundled_code_header(const vector& x) - : Bundled_code_header{x.cbegin(), x.cend()} {} - -hip_impl::Bundled_code_header::Bundled_code_header( - const void* p) { // This is a pretty terrible interface, useful only because - // hipLoadModuleData is so poorly specified (for no fault of its own). - if (!p) return; - - if (!valid(*static_cast(p))) return; - auto ph = static_cast(p); - - size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); - auto pb = static_cast(p) + sizeof(Header_); - auto n = ph->bundle_cnt_; - while (n--) { - sz += reinterpret_cast(pb)->bundle_sz; - pb += sizeof(Bundled_code::Header); - } - - read(static_cast(p), static_cast(p) + sz, *this); -} diff --git a/include/hip/hcc_detail/code_object_bundle.hpp b/src/code_object_bundle.inl similarity index 86% rename from include/hip/hcc_detail/code_object_bundle.hpp rename to src/code_object_bundle.inl index 77e0d706d6..596ac60661 100644 --- a/include/hip/hcc_detail/code_object_bundle.hpp +++ b/src/code_object_bundle.inl @@ -92,10 +92,6 @@ struct Bundled_code { #define magic_string_ "__CLANG_OFFLOAD_BUNDLE__" -#ifdef __GNUC__ -#pragma GCC visibility push (default) -#endif - class Bundled_code_header { // DATA - STATICS static constexpr auto magic_string_sz_ = sizeof(magic_string_) - 1; @@ -167,8 +163,26 @@ class Bundled_code_header { Bundled_code_header() = default; template Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l); - explicit Bundled_code_header(const std::vector& blob); - explicit Bundled_code_header(const void* maybe_blob); + explicit Bundled_code_header(const std::vector& blob) + : Bundled_code_header{blob.cbegin(), blob.cend()} {} + explicit Bundled_code_header(const void* maybe_blob) { + // This is a pretty terrible interface, useful only because + // hipLoadModuleData is so poorly specified (for no fault of its own). + if (!maybe_blob) return; + + if (!valid(*static_cast(maybe_blob))) return; + auto ph = static_cast(maybe_blob); + + size_t sz = sizeof(Header_) + ph->bundle_cnt_ * sizeof(Bundled_code::Header); + auto pb = static_cast(maybe_blob) + sizeof(Header_); + auto n = ph->bundle_cnt_; + while (n--) { + sz += reinterpret_cast(pb)->bundle_sz; + pb += sizeof(Bundled_code::Header); + } + + read(static_cast(maybe_blob), static_cast(maybe_blob) + sz, *this); + } Bundled_code_header(const Bundled_code_header&) = default; Bundled_code_header(Bundled_code_header&&) = default; ~Bundled_code_header() = default; @@ -180,10 +194,6 @@ class Bundled_code_header { size_t bundled_code_size = 0; }; -#ifdef __GNUC__ -#pragma GCC visibility pop -#endif - // CREATORS template Bundled_code_header::Bundled_code_header(RandomAccessIterator f, RandomAccessIterator l) diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp index e8f3e86881..85aa0ad810 100644 --- a/src/hip_clang.cpp +++ b/src/hip_clang.cpp @@ -51,7 +51,7 @@ __hipRegisterFatBinary(const void* data) return nullptr; } - auto modules = new std::vector{g_deviceCnt}; + auto modules = new std::vector(g_deviceCnt); if (!modules) { return nullptr; } @@ -90,9 +90,9 @@ __hipRegisterFatBinary(const void* data) reinterpret_cast(header) + desc->offset), desc->size}; if (HIP_DUMP_CODE_OBJECT) __hipDumpCodeObject(image); - module->executable = hip_impl::get_program_state().load_executable(image.data(), image.size(), - module->executable, - agent); + module->executable = hip_impl::get_program_state().load_executable_no_copy( + reinterpret_cast(header) + desc->offset, desc->size, + module->executable, agent); if (module->executable.handle) { hip_impl::program_state_impl::read_kernarg_metadata(image, module->kernargs); @@ -136,7 +136,7 @@ extern "C" void __hipRegisterFunction( int* wSize) { HIP_INIT_API(NONE, modules, hostFunction, deviceFunction, deviceName); - std::vector functions{g_deviceCnt}; + std::vector functions(g_deviceCnt); assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 1bbdb10bbc..f7d6b3ac79 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -96,12 +96,13 @@ hipError_t hipDeviceGetLimit(size_t* pValue, hipLimit_t limit) { if (pValue == nullptr) { return ihipLogStatus(hipErrorInvalidValue); } +#if __HIP_ENABLE_DEVICE_MALLOC__ if (limit == hipLimitMallocHeapSize) { *pValue = (size_t)__HIP_SIZE_OF_HEAP; return ihipLogStatus(hipSuccess); - } else { - return ihipLogStatus(hipErrorUnsupportedLimit); } +#endif + return ihipLogStatus(hipErrorUnsupportedLimit); } hipError_t hipFuncSetCacheConfig(const void* func, hipFuncCache_t cacheConfig) { @@ -310,6 +311,18 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop->cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedFunc: + *pi = prop->cooperativeMultiDeviceUnmatchedFunc; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedGridDim: + *pi = prop->cooperativeMultiDeviceUnmatchedGridDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedBlockDim: + *pi = prop->cooperativeMultiDeviceUnmatchedBlockDim; + break; + case hipDeviceAttributeCooperativeMultiDeviceUnmatchedSharedMem: + *pi = prop->cooperativeMultiDeviceUnmatchedSharedMem; + break; case hipDeviceAttributeMaxPitch: *pi = prop->memPitch; break; diff --git a/src/hip_event.cpp b/src/hip_event.cpp index 733f0d9db3..c626f7956d 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -24,13 +24,86 @@ THE SOFTWARE. #include "hip_hcc_internal.h" #include "trace_helper.h" +#include // errno, ENOENT +#include // O_RDWR, O_CREATE +#include // shm_open, shm_unlink, mmap, munmap, PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED +#include // ftruncate, close + +namespace { + + inline + const char* hsa_to_string(hsa_status_t err) noexcept + { + const char* r{}; + + if (hsa_status_string(err, &r) == HSA_STATUS_SUCCESS) return r; + + return "Unknown."; + } + + template + inline + void throwing_result_check(hsa_status_t res, const char (&file)[m], + const char (&function)[n], int line) { + if (res == HSA_STATUS_SUCCESS) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + hsa_to_string(res)}; + } + + template + inline + void throwing_retval_check(int good, int retval, const char (&file)[m], + const char (&function)[n], int line) { + if (retval == good) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(retval)}; + } + + template + inline + void throwing_msg_check(bool bad, const char (&msg)[o], + const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + msg}; + } + + template + inline + void throwing_errno_check(bool bad, const char (&file)[m], + const char (&function)[n], int line) { + if (!bad) return; + + throw std::runtime_error{"Failed in file " + (file + + (", in function \"" + (function + + ("\", on line " + std::to_string(line))))) + + ", with error: " + strerror(errno)}; + } + +} // Unnamed namespace. + //------------------------------------------------------------------------------------------------- //------------------------------------------------------------------------------------------------- // Events //--- -ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) { _flags = flags; }; +ihipEvent_t::ihipEvent_t(unsigned flags) : _criticalData(this) { + _flags = flags; + GET_TLS(); + auto ctx = ihipGetTlsDefaultCtx(); + _deviceId = ctx == nullptr ? -1 : ctx->getDevice()->_deviceId; +}; // Attach to an existing completion future: @@ -44,6 +117,43 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future* cf, hipS } +static void createIpcEventShmemIfNeeded(ihipEventData_t &ecd) { + if (!ecd._ipc_name.empty()) return; + + // create random shmem name + char name_template[] = "/tmp/eventXXXXXX"; + int temp_fd = mkstemp(name_template); + throwing_errno_check(-1 == temp_fd, __FILE__, __func__, __LINE__); + + // copy shmem name into event data, reformat to use a single slash + ecd._ipc_name = name_template; + ecd._ipc_name.replace(0, 5, "/hip_"); + + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR | O_CREAT, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + + // size it + throwing_retval_check(0, ftruncate(ecd._ipc_fd, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + + // initialize shared state + ecd._ipc_shmem->owners = 1; + ecd._ipc_shmem->read_index = -1; + ecd._ipc_shmem->write_index = 0; + for (int i=0; i < IPC_SIGNALS_PER_EVENT; i++) { + ecd._ipc_shmem->signal[i] = 0; + } + + // remove temp file + throwing_errno_check(-1 == close(temp_fd), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == unlink(name_template), __FILE__, __func__, __LINE__); +} + + static std::pair refreshEventStatus(ihipEventData_t &ecd) { if (ecd._state == hipEventStatusRecording && ecd.marker().is_ready()) { if ((ecd._type == hipEventTypeIndependent) || @@ -70,9 +180,9 @@ static std::pair refreshEventStatus(ihipEventData_t hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) { hipError_t e = hipSuccess; - // TODO-IPC - support hipEventInterprocess. unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming | - hipEventReleaseToDevice | hipEventReleaseToSystem; + hipEventReleaseToDevice | hipEventReleaseToSystem | + hipEventInterprocess; const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); const bool illegalFlags = @@ -100,29 +210,66 @@ hipError_t hipEventCreate(hipEvent_t* event) { return ihipLogStatus(ihipEventCreate(event, 0)); } - hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_INIT_SPECIAL_API(hipEventRecord, TRACE_SYNC, event, stream); if (!event) return ihipLogStatus(hipErrorInvalidHandle); stream = ihipSyncAndResolveStream(stream); LockedAccessor_EventCrit_t eCrit(event->criticalData()); - if (eCrit->_eventData._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); + auto &ecd{eCrit->_eventData}; + if (ecd._state == hipEventStatusUnitialized) return ihipLogStatus(hipErrorInvalidHandle); if (HIP_SYNC_NULL_STREAM && stream->isDefaultStream()) { // TODO-HIP_SYNC_NULL_STREAM : can remove this code when HIP_SYNC_NULL_STREAM = 0 // If default stream , then wait on all queues. ihipCtx_t* ctx = ihipGetTlsDefaultCtx(); ctx->locked_syncDefaultStream(true, true); - eCrit->_eventData.marker(hc::completion_future()); // reset event - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = hc::get_system_ticks(); - eCrit->_eventData._state = hipEventStatusComplete; + ecd.marker(hc::completion_future()); // reset event + ecd._stream = stream; + ecd._timestamp = hc::get_system_ticks(); + ecd._state = hipEventStatusComplete; + // TODO handle IPC case? } else { // Record the event in the stream: - eCrit->_eventData.marker(stream->locked_recordEvent(event)); - eCrit->_eventData._stream = stream; - eCrit->_eventData._timestamp = 0; - eCrit->_eventData._state = hipEventStatusRecording; + ecd.marker(stream->locked_recordEvent(event)); + ecd._stream = stream; + ecd._timestamp = 0; + ecd._state = hipEventStatusRecording; + if (event->_flags & hipEventInterprocess) { + createIpcEventShmemIfNeeded(ecd); + int write_index = ecd._ipc_shmem->write_index++; // fetch add + int offset = write_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } + // Lock signal. + ecd._ipc_shmem->signal[offset] = 1; + // forward signal state from local signal to IPC signal via host callback + // create callback that can be passed to hsa_amd_signal_async_handler + // this function decrements the IPC signal by 1 to indicate completion + std::atomic *signal = &ecd._ipc_shmem->signal[offset]; + auto t{new std::function{[=]() { + signal->store(0); + }}}; + // register above callback with HSA runtime to be called when local signal + // is decremented from 1 to 0 by CP + auto local_signal = *reinterpret_cast(eCrit->_eventData.marker().get_native_handle()); + hsa_amd_signal_async_handler(local_signal, HSA_SIGNAL_CONDITION_LT, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + // Update read index to indicate new signal. + int expected = write_index-1; + while (!ecd._ipc_shmem->read_index.compare_exchange_weak(expected, write_index)) { + throwing_msg_check( + expected >= write_index, + "IPC event record update read index failure", + __FILE__, __func__, __LINE__); + expected = write_index-1; + } + } } return ihipLogStatus(hipSuccess); } @@ -132,8 +279,18 @@ hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(hipEventDestroy, event); if (event) { + { + LockedAccessor_EventCrit_t crit(event->criticalData()); + auto &ecd{crit->_eventData}; + if (ecd._ipc_shmem) { + int owners = --ecd._ipc_shmem->owners; + throwing_errno_check(-1 == munmap(ecd._ipc_shmem, sizeof(ihipIpcEventShmem_t)), __FILE__, __func__, __LINE__); + throwing_errno_check(-1 == close(ecd._ipc_fd), __FILE__, __func__, __LINE__); + if (0 == owners) + throwing_errno_check(-1 == shm_unlink(ecd._ipc_name.c_str()), __FILE__, __func__, __LINE__); + } + } delete event; - return ihipLogStatus(hipSuccess); } else { return ihipLogStatus(hipErrorInvalidHandle); @@ -143,31 +300,44 @@ hipError_t hipEventDestroy(hipEvent_t event) { hipError_t hipEventSynchronize(hipEvent_t event) { HIP_INIT_SPECIAL_API(hipEventSynchronize, TRACE_SYNC, event); - if (event){ - if (!(event->_flags & hipEventReleaseToSystem)) { - tprintf(DB_WARN, - "hipEventSynchronize on event without system-scope fence ; consider creating with " - "hipEventReleaseToSystem\n"); - } - auto ecd = event->locked_copyCrit(); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (ecd._state == hipEventStatusUnitialized) { - return ihipLogStatus(hipErrorInvalidHandle); - } else if (ecd._state == hipEventStatusCreated) { - // Created but not actually recorded on any device: - return ihipLogStatus(hipSuccess); - } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { - auto* ctx = ihipGetTlsDefaultCtx(); - // TODO-HIP_SYNC_NULL_STREAM - can remove this code - ctx->locked_syncDefaultStream(true, true); - return ihipLogStatus(hipSuccess); - } else { - ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked - : hc::hcWaitModeActive); - return ihipLogStatus(hipSuccess); + if (!(event->_flags & hipEventReleaseToSystem)) { + tprintf(DB_WARN, + "hipEventSynchronize on event without system-scope fence ; consider creating with " + "hipEventReleaseToSystem\n"); + } + + auto ecd = event->locked_copyCrit(); + + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + int previous_read_index = ecd._ipc_shmem->read_index; + if (previous_read_index >= 0) { + // we have at least one recorded event, so proceed + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event still valid and still locked, spin. + while (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + // TODO backoff + } } - } else { + return ihipLogStatus(hipSuccess); + } + + if (ecd._state == hipEventStatusUnitialized) { return ihipLogStatus(hipErrorInvalidHandle); + } else if (ecd._state == hipEventStatusCreated) { + // Created but not actually recorded on any device: + return ihipLogStatus(hipSuccess); + } else if (HIP_SYNC_NULL_STREAM && (ecd._stream->isDefaultStream())) { + auto* ctx = ihipGetTlsDefaultCtx(); + // TODO-HIP_SYNC_NULL_STREAM - can remove this code + ctx->locked_syncDefaultStream(true, true); + return ihipLogStatus(hipSuccess); + } else { + ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked + : hc::hcWaitModeActive); + return ihipLogStatus(hipSuccess); } } @@ -175,7 +345,9 @@ hipError_t hipEventElapsedTime(float* ms, hipEvent_t start, hipEvent_t stop) { HIP_INIT_API(hipEventElapsedTime, ms, start, stop); if (ms == nullptr) return ihipLogStatus(hipErrorInvalidValue); - if ((start == nullptr) || (stop == nullptr)) return ihipLogStatus(hipErrorInvalidHandle); + if ((start == nullptr) || (stop == nullptr) || + (start->_deviceId != stop->_deviceId)) + return ihipLogStatus(hipErrorInvalidHandle); *ms = 0.0f; auto startEcd = start->locked_copyCrit(); @@ -232,9 +404,80 @@ hipError_t hipEventQuery(hipEvent_t event) { auto ecd = event->locked_copyCrit(); - if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { - return ihipLogStatus(hipErrorNotReady); + // this event is either from an ipc handle, or the owner of a local ipc event + if (event->_flags & hipEventInterprocess) { + if (ecd._ipc_shmem) { + int previous_read_index = ecd._ipc_shmem->read_index; + int offset = previous_read_index % IPC_SIGNALS_PER_EVENT; + if (ecd._ipc_shmem->read_index < previous_read_index+IPC_SIGNALS_PER_EVENT && ecd._ipc_shmem->signal[offset] != 0) { + return ihipLogStatus(hipErrorNotReady); + } + else { + return ihipLogStatus(hipSuccess); + } + } + } + // normal event + else { + if (ecd._state == hipEventStatusRecording && !ecd.marker().is_ready()) { + return ihipLogStatus(hipErrorNotReady); + } } return ihipLogStatus(hipSuccess); } + +hipError_t hipIpcGetEventHandle(hipIpcEventHandle_t* handle, hipEvent_t event) +{ + HIP_INIT_API(hipIpcGetEventHandle, handle, event); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!handle) return ihipLogStatus(hipErrorInvalidHandle); + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventInterprocess)) return ihipLogStatus(hipErrorInvalidHandle); + if (!(event->_flags & hipEventDisableTiming)) return ihipLogStatus(hipErrorInvalidHandle); + + LockedAccessor_EventCrit_t crit(event->criticalData()); + + auto &ecd{crit->_eventData}; + createIpcEventShmemIfNeeded(ecd); + // copy name into handle + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)handle; + memset(iHandle->shmem_name, 0, HIP_IPC_HANDLE_SIZE); + ecd._ipc_name.copy(iHandle->shmem_name, std::string::npos); + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} + +hipError_t hipIpcOpenEventHandle(hipEvent_t* event, hipIpcEventHandle_t handle) +{ + HIP_INIT_API(hipIpcOpenEventHandle, event, &handle); + +#if USE_IPC && ATOMIC_INT_LOCK_FREE == 2 + if (!event) return ihipLogStatus(hipErrorInvalidHandle); + + // create a new event with timing disabled, per spec + auto hip_status = ihipEventCreate(event, hipEventDisableTiming | hipEventInterprocess); + if (hip_status != hipSuccess) return ihipLogStatus(hip_status); + + LockedAccessor_EventCrit_t crit((*event)->criticalData()); + auto &ecd{crit->_eventData}; + ihipIpcEventHandle_t* iHandle = (ihipIpcEventHandle_t*)&handle; + ecd._ipc_name = iHandle->shmem_name; + // open shmem + ecd._ipc_fd = shm_open(ecd._ipc_name.c_str(), O_RDWR, 0777); + throwing_errno_check(ecd._ipc_fd < 0, __FILE__, __func__, __LINE__); + // mmap it + ecd._ipc_shmem = (ihipIpcEventShmem_t*)mmap(0, sizeof(ihipIpcEventShmem_t), PROT_READ | PROT_WRITE, MAP_SHARED, ecd._ipc_fd, 0); + throwing_errno_check(NULL == ecd._ipc_shmem, __FILE__, __func__, __LINE__); + // update shared state + ecd._ipc_shmem->owners += 1; + + return ihipLogStatus(hipSuccess); +#else + return ihipLogStatus(hipErrorNotSupported); +#endif +} diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index c9688408c8..5159254d57 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -263,7 +263,13 @@ ihipStream_t::ihipStream_t(ihipCtx_t* ctx, hc::accelerator_view av, unsigned int //--- -ihipStream_t::~ihipStream_t() {} +ihipStream_t::~ihipStream_t() { + GET_TLS(); + for (auto mem : coopMemsTracker) { + hip_internal::ihipHostFree(tls, mem->mgs); + hip_internal::ihipHostFree(tls, mem); + } +} hc::hcWaitMode ihipStream_t::waitMode() const { @@ -326,12 +332,55 @@ void ihipStream_t::locked_wait() { locked_wait(waited); }; +typedef struct { + int previous_read_index; + ihipIpcEventShmem_t *shmem; + hsa_signal_t signal; +} callback_data_t; + +static void WaitThenDecrementSignal(callback_data_t *data) { + int offset = data->previous_read_index % IPC_SIGNALS_PER_EVENT; + // While event valid and locked, spin. + while (data->shmem->read_index < data->previous_read_index+IPC_SIGNALS_PER_EVENT && data->shmem->signal[offset] != 0) { + } + hsa_signal_store_relaxed(data->signal, 0); + delete data; +} + // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. void ihipStream_t::locked_streamWaitEvent(ihipEventData_t& ecd) { LockedAccessor_StreamCrit_t crit(_criticalData); - crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + // if event is an IPC event, it doesn't have a marker + // we use a host callback to block stream with a signal wait + if (ecd._ipc_shmem) { + // create first marker + auto cf = crit->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will host wait on IPC signal, then sets first packet's signal to 0 to indicate completion + auto t{new callback_data_t{ecd._ipc_shmem->read_index, ecd._ipc_shmem, signal}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + // the HSA async handler is single threaded, so we can't block, therefore use a detached thread + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + std::thread(WaitThenDecrementSignal, static_cast(p)).detach(); + return false; + }, t); + + // create additional marker that blocks on the first one + crit->_av.create_blocking_marker(cf, hc::accelerator_scope); + } + else { + crit->_av.create_blocking_marker(ecd.marker(), hc::accelerator_scope); + } } @@ -677,7 +726,7 @@ hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { break; case HSA_REGION_SEGMENT_GROUP: err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, - &(p_prop->sharedMemPerBlock)); + &(p_prop->maxSharedMemoryPerMultiProcessor)); break; default: break; @@ -835,10 +884,8 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { hsa_region_t* am_region = static_cast(_acc.get_hsa_am_region()); err = hsa_region_get_info(*am_region, HSA_REGION_INFO_SIZE, &prop->totalGlobalMem); DeviceErrorCheck(err); - // maxSharedMemoryPerMultiProcessor should be as the same as group memory size. - // Group memory will not be paged out, so, the physical memory size is the total shared memory - // size, and also equal to the group pool size. - prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; + // Current GPUs allow a workgroup to use all of LDS in a CU, so these two are equal. + prop->sharedMemPerBlock = prop->maxSharedMemoryPerMultiProcessor; // Get Max memory clock frequency err = @@ -897,9 +944,16 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) { prop->integrated = 1; } - // Enable the cooperative group for gfx9+ - prop->cooperativeLaunch = (prop->gcnArch < 900) ? 0 : 1; - prop->cooperativeMultiDeviceLaunch = (prop->gcnArch < 900) ? 0 : 1; + // Enable the cooperative group for GPUs that support all the required features + err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COOPERATIVE_QUEUES, + &prop->cooperativeLaunch); + DeviceErrorCheck(err); + prop->cooperativeMultiDeviceLaunch = prop->cooperativeLaunch; + + prop->cooperativeMultiDeviceUnmatchedFunc = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedGridDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedBlockDim = prop->cooperativeMultiDeviceLaunch; + prop->cooperativeMultiDeviceUnmatchedSharedMem = prop->cooperativeMultiDeviceLaunch; err = hsa_agent_get_info(_hsaAgent, (hsa_agent_info_t)HSA_EXT_AGENT_INFO_IMAGE_1D_MAX_ELEMENTS, &prop->maxTexture1D); @@ -1515,20 +1569,6 @@ hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream) { return e; } -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb) { - hipError_t e = hipSuccess; - - // Synchronize stream - tprintf(DB_SYNC, "ihipStreamCallbackHandler wait on stream %s\n", - ToString(cb->_stream).c_str()); - GET_TLS(); - e = ihipStreamSynchronize(tls, cb->_stream); - - // Call registered callback function - cb->_callback(cb->_stream, e, cb->_userData); - delete cb; -} - //--- // Get the stream to use for a command submission. // @@ -1619,7 +1659,9 @@ void ihipPrintKernelLaunch(const char* kernelName, const grid_launch_parm* lp, // Allows runtime to track some information about the stream. hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm* lp, const char* kernelNameStr, bool lockAcquired) { - stream = ihipSyncAndResolveStream(stream, lockAcquired); + if (stream == nullptr || stream != stream->getCtx()->_defaultStream) { + stream = ihipSyncAndResolveStream(stream, lockAcquired); + } lp->grid_dim.x = grid.x; lp->grid_dim.y = grid.y; lp->grid_dim.z = grid.z; diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 104fd910a8..803abe28e2 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -389,18 +389,28 @@ const hipStream_t hipStreamNull = 0x0; /** - * HIP IPC Handle Size + * HIP IPC Mem Handle Size */ -#define HIP_IPC_RESERVED_SIZE 24 +#define HIP_IPC_MEM_RESERVED_SIZE 24 class ihipIpcMemHandle_t { public: #if USE_IPC hsa_amd_ipc_memory_t ipc_handle; ///< ipc memory handle on ROCr #endif size_t psize; - char reserved[HIP_IPC_RESERVED_SIZE]; + char reserved[HIP_IPC_MEM_RESERVED_SIZE]; }; +/** + * HIP IPC Event Handle Size + */ +#define HIP_IPC_EVENT_RESERVED_SIZE 32 +class ihipIpcEventHandle_t { + public: +#if USE_IPC + char shmem_name[HIP_IPC_HANDLE_SIZE]; +#endif +}; struct ihipModule_t { std::string fileName; @@ -551,6 +561,20 @@ public: typedef ihipStreamCriticalBase_t ihipStreamCritical_t; typedef LockedAccessor LockedAccessor_StreamCrit_t; +// do not change these two structs without changing the device library +struct mg_sync { + uint w0; + uint w1; +}; + +struct mg_info { + struct mg_sync *mgs; + uint grid_id; + uint num_grids; + ulong prev_sum; + ulong all_sum; +}; + //--- // Internal stream structure. class ihipStream_t { @@ -619,6 +643,8 @@ class ihipStream_t { // Before calling this function, stream must be resolved from "0" to the actual stream: bool isDefaultStream() const { return _id == 0; }; + std::vector coopMemsTracker; + public: //--- // Public member vars - these are set at initialization and never change: @@ -654,19 +680,6 @@ class ihipStream_t { }; -//---- -// Internal structure for stream callback handler -class ihipStreamCallback_t { - public: - ihipStreamCallback_t(hipStream_t stream, hipStreamCallback_t callback, void* userData) - : _stream(stream), _callback(callback), _userData(userData) { - }; - hipStream_t _stream; - hipStreamCallback_t _callback; - void* _userData; -}; - - //---- // Internal event structure: enum hipEventStatus_t { @@ -683,6 +696,14 @@ enum ihipEventType_t { hipEventTypeStopCommand, }; +#define IPC_SIGNALS_PER_EVENT 32 +typedef struct ihipIpcEventShmem_s { + std::atomic owners; + std::atomic read_index; + std::atomic write_index; + std::atomic signal[IPC_SIGNALS_PER_EVENT]; +} ihipIpcEventShmem_t; + struct ihipEventData_t { ihipEventData_t() { @@ -690,18 +711,24 @@ struct ihipEventData_t { _stream = NULL; _timestamp = 0; _type = hipEventTypeIndependent; + _ipc_name = ""; + _ipc_fd = 0; + _ipc_shmem = NULL; }; - void marker(const hc::completion_future& marker) { _marker = marker; }; + void marker(const hc::completion_future& marker) { _marker = marker; } hc::completion_future& marker() { return _marker; } - uint64_t timestamp() const { return _timestamp; }; - ihipEventType_t type() const { return _type; }; + uint64_t timestamp() const { return _timestamp; } + ihipEventType_t type() const { return _type; } ihipEventType_t _type; hipEventStatus_t _state; hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual // stream when recorded uint64_t _timestamp; // store timestamp, may be set on host or by marker. + std::string _ipc_name; + int _ipc_fd; + ihipIpcEventShmem_t *_ipc_shmem; private: hc::completion_future _marker; }; @@ -713,7 +740,7 @@ template class ihipEventCriticalBase_t : LockedBase { public: explicit ihipEventCriticalBase_t(const ihipEvent_t* parentEvent) : _parent(parentEvent) {} - ~ihipEventCriticalBase_t(){}; + ~ihipEventCriticalBase_t() {} // Keep data in structure so it can be easily copied into snapshots // (used to reduce lock contention and preserve correct lock order) @@ -745,6 +772,7 @@ class ihipEvent_t { public: unsigned _flags; + int _deviceId; private: ihipEventCritical_t _criticalData; @@ -980,7 +1008,6 @@ hipError_t hipModuleGetFunctionEx(hipFunction_t* hfunc, hipModule_t hmod, hipStream_t ihipSyncAndResolveStream(hipStream_t, bool lockAcquired = 0); hipError_t ihipStreamSynchronize(TlsData *tls, hipStream_t stream); -void ihipStreamCallbackHandler(ihipStreamCallback_t* cb); /** * @brief Copies the memory address and size of symbol @p symbolName @@ -1043,7 +1070,7 @@ namespace hip_internal { hipError_t memcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream); -hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags); +hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags, bool noSync = 0); hipError_t ihipHostFree(TlsData *tls, void* ptr); @@ -1051,20 +1078,6 @@ hipError_t ihipHostFree(TlsData *tls, void* ptr); #define MAX_COOPERATIVE_GPUs 255 -// do not change these two structs without changing the device library -struct mg_sync { - uint w0; - uint w1; -}; - -struct mg_info { - struct mg_sync *mgs; - uint grid_id; - uint num_grids; - ulong prev_sum; - ulong all_sum; -}; - //--- // TODO - review the context creation strategy here. Really should be: // - first "non-device" runtime call creates the context for this thread. Allowed to call diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 8159f22a97..82ecaea82a 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -19,6 +19,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + #include #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" @@ -30,8 +31,10 @@ THE SOFTWARE. #include #include +#if __HIP_ENABLE_DEVICE_MALLOC__ __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; __device__ uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; +#endif // Internal HIP APIS: namespace hip_internal { @@ -309,31 +312,52 @@ void generic_copy(void* __restrict dst, const void* __restrict src, size_t n, if (di.size == is_cpu_owned) return d2h_copy(dst, src, n, si); if (si.size == is_cpu_owned) return h2d_copy(dst, src, n, di); - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); } inline void memcpy_impl(void* __restrict dst, const void* __restrict src, size_t n, hipMemcpyKind k) { + auto si{info(src)}; + auto di{info(dst)}; + + if (!is_large_BAR){ + // Pointer info takes presidence over hipMemcpyKind + // if there is mismatch b/w Memcpy kind and dst/src pointer + // E.g. dst(host pointer),src(device pointer) and hipMemcpyKind set as hipMemcpyHostToDevice + if (di.size == is_cpu_owned && si.size == is_cpu_owned) + k = hipMemcpyHostToHost; + else if (si.size == is_cpu_owned && di.size != is_cpu_owned) + k = hipMemcpyHostToDevice; + else if (di.size == is_cpu_owned && si.size != is_cpu_owned) + k = hipMemcpyDeviceToHost; + else + k = hipMemcpyDeviceToDevice; + } switch (k) { case hipMemcpyHostToHost: std::memcpy(dst, src, n); break; - case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, info(dst)); - case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, info(src)); + case hipMemcpyHostToDevice: return h2d_copy(dst, src, n, di); + case hipMemcpyDeviceToHost: return d2h_copy(dst, src, n, si); case hipMemcpyDeviceToDevice: { - const auto di{info(dst)}; - const auto si{info(src)}; - throwing_result_check(hsa_amd_agents_allow_access(1u, &si.agentOwner, - nullptr, - di.agentBaseAddress), - __FILE__, __func__, __LINE__); - return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + hsa_status_t res = hsa_amd_agents_allow_access(1u, &si.agentOwner, + nullptr, di.agentBaseAddress); + if (res == HSA_STATUS_SUCCESS){ + return do_copy(dst, src, n, di.agentOwner, si.agentOwner); + } + + // If devices do not have access then fallback mechanism will be used + // copy will be slower + throwing_result_check(hsa_memory_copy(dst,src,n), __FILE__, __func__, __LINE__); + break; } - default: return generic_copy(dst, src, n, info(dst), info(src)); + default: return generic_copy(dst, src, n, di, si); } } @@ -475,20 +499,20 @@ void* allocAndSharePtr(const char* msg, size_t sizeBytes, ihipCtx_t* ctx, bool s return ptr; } -hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags) { +hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned int flags, bool noSync) { hipError_t hip_status = hipSuccess; - if (HIP_SYNC_HOST_ALLOC) { + if (sizeBytes == 0) { + return hipSuccess; + } + + if (HIP_SYNC_HOST_ALLOC && !noSync) { hipDeviceSynchronize(); } auto ctx = ihipGetTlsDefaultCtx(); if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - hip_status = hipSuccess; - // TODO - should size of 0 return err or be siliently ignored? } else { unsigned trueFlags = flags; if (flags == hipHostMallocDefault) { @@ -537,7 +561,7 @@ hipError_t ihipHostMalloc(TlsData *tls, void** ptr, size_t sizeBytes, unsigned i } } - if (HIP_SYNC_HOST_ALLOC) { + if (HIP_SYNC_HOST_ALLOC && !noSync) { hipDeviceSynchronize(); } return hip_status; @@ -673,14 +697,15 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) { HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + auto ctx = ihipGetTlsDefaultCtx(); // return NULL pointer when malloc size is 0 if ( nullptr == ctx || nullptr == ptr) { hip_status = hipErrorInvalidValue; - } - else if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; } else { auto device = ctx->getWriteableDevice(); *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false /*shareWithAll*/, @@ -700,14 +725,15 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag HIP_SET_DEVICE(); #if (__hcc_workweek__ >= 19115) + if (sizeBytes == 0) { + if (ptr) *ptr = NULL; + return ihipLogStatus(hipSuccess); + } + hipError_t hip_status = hipSuccess; auto ctx = ihipGetTlsDefaultCtx(); - // return NULL pointer when malloc size is 0 - if (sizeBytes == 0) { - *ptr = NULL; - hip_status = hipSuccess; - } else if ((ctx == nullptr) || (ptr == nullptr)) { + if ((ctx == nullptr) || (ptr == nullptr)) { hip_status = hipErrorInvalidValue; } else { unsigned amFlags = 0; @@ -736,6 +762,9 @@ hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flag hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_SPECIAL_API(hipHostMalloc, (TRACE_MEM), ptr, sizeBytes, flags); HIP_SET_DEVICE(); + if (sizeBytes == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; hip_status = hip_internal::ihipHostMalloc(tls, ptr, sizeBytes, flags); return ihipLogStatus(hip_status); @@ -744,6 +773,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { hipError_t hipMallocManaged(void** devPtr, size_t size, unsigned int flags) { HIP_INIT_SPECIAL_API(hipMallocManaged, (TRACE_MEM), devPtr, size, flags); HIP_SET_DEVICE(); + if (size == 0) { + return ihipLogStatus(hipSuccess); + } hipError_t hip_status = hipSuccess; if(flags != hipMemAttachGlobal) hip_status = hipErrorInvalidValue; @@ -1224,6 +1256,7 @@ hipError_t hipMemcpyToSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1246,6 +1279,7 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1269,6 +1303,7 @@ hipError_t hipMemcpyToSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, dst); + if (count == 0) return ihipLogStatus(hipSuccess); if (dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1301,6 +1336,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* src, size_t count, tprintf(DB_MEM, " symbol '%s' resolved to address:%p\n", symbol_name, src); + if (count == 0) return ihipLogStatus(hipSuccess); if (src == nullptr || dst == nullptr) { return ihipLogStatus(hipErrorInvalidSymbol); } @@ -1592,6 +1628,7 @@ hipError_t ihipMemcpy3D(const struct hipMemcpy3DParms* p, hipStream_t stream, bo srcXoffset = p->srcPos.x; srcYoffset = p->srcPos.y; srcZoffset = p->srcPos.z; + if (copyWidth == 0) return hipSuccess; if (p->dstArray != nullptr) { if ((p->dstArray->isDrv == true) ||( p->dstPtr.ptr!= nullptr)){ return hipErrorInvalidValue; @@ -1933,6 +1970,7 @@ hipError_t getLockedPointer(void *hostPtr, size_t dataLen, void **devicePtrPtr) // TODO - review and optimize hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); @@ -1989,6 +2027,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, hipError_t ihipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { + if (height == 0 || width == 0) return hipSuccess; if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue; hipError_t e = hipSuccess; int isLockedOrD2D = 0; @@ -2043,6 +2082,7 @@ hipError_t ihip2dOffsetMemcpy(void* dst, size_t dpitch, const void* src, size_t size_t height, size_t srcXOffsetInBytes, size_t srcYOffset, size_t dstXOffsetInBytes, size_t dstYOffset,hipMemcpyKind kind, hipStream_t stream, bool isAsync) { + if (height == 0 || width == 0) return hipSuccess; if((spitch < width + srcXOffsetInBytes) || (srcYOffset >= height)){ return hipErrorInvalidValue; } else if((dpitch < width + dstXOffsetInBytes) || (dstYOffset >= height)){ @@ -2061,6 +2101,7 @@ hipError_t ihipMemcpyParam2D(const hip_Memcpy2D* pCopy, hipStream_t stream, bool if (pCopy == nullptr) { return hipErrorInvalidValue; } + if (pCopy->Height == 0 || pCopy->WidthInBytes == 0) return hipSuccess; void* dst; const void* src; size_t spitch = pCopy->srcPitch; size_t dpitch = pCopy->dstPitch; @@ -2140,6 +2181,7 @@ hipError_t hipMemcpy2DFromArray( void* dst, size_t dpitch, hipArray_const_t src, hipError_t hipMemcpy2DFromArrayAsync( void* dst, size_t dpitch, hipArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream ){ HIP_INIT_SPECIAL_API(hipMemcpy2DFromArrayAsync, (TRACE_MCMD), dst, dpitch, src, wOffset, hOffset, width, height, kind, stream); size_t byteSize; + if (height == 0 || width == 0) return ihipLogStatus(hipSuccess); if(src) { switch (src->desc.f) { case hipChannelFormatKindSigned: @@ -2180,16 +2222,40 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); } +hipError_t ihipMemsetND(void* dst, size_t pitch, int value, size_t width, size_t height, size_t setHeight,size_t depth, + hipStream_t stream, enum ihipMemsetDataType copyDataType, bool async) { + size_t sizeBytes =0; + hipError_t hipStatus = hipSuccess; + if ((pitch == width) && (height == setHeight)) { + sizeBytes = pitch * setHeight * depth; + if(async) + return ihipMemsetAsync(dst, value, sizeBytes, stream, copyDataType); + else + return ihipMemsetSync(dst, value, sizeBytes, nullptr, copyDataType); + } else { + for(size_t i = 0; i < depth; ++i) { + for(size_t j = 0; j < setHeight; ++j) { + void* dstPtr = ((unsigned char*) dst + i * height * pitch + j * pitch); + if(async) + hipStatus = ihipMemsetAsync(dstPtr, value, width, stream, copyDataType); + else + hipStatus = ihipMemsetSync(dstPtr, value, width, nullptr, copyDataType); + if (hipStatus != hipSuccess) + return hipStatus; + } + } + } + return hipStatus; +} + hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) { HIP_INIT_SPECIAL_API(hipMemset2D, (TRACE_MCMD), dst, pitch, value, width, height); - size_t sizeBytes = pitch * height; - return ihipLogStatus(ihipMemsetSync(dst, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(dst, pitch, value, width, height, height, 1, hipStreamNull, ihipMemsetDataTypeChar, false)); } hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height, hipStream_t stream ) { HIP_INIT_SPECIAL_API(hipMemset2DAsync, (TRACE_MCMD), dst, pitch, value, width, height, stream); - size_t sizeBytes = pitch * height; - return ihipLogStatus(ihipMemsetAsync(dst, value, sizeBytes, stream, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(dst, pitch, value, width, height, height, 1, stream, ihipMemsetDataTypeChar, true)); } hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t count) { @@ -2219,14 +2285,12 @@ hipError_t hipMemsetD32(hipDeviceptr_t dst, int value, size_t count) { hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent) { HIP_INIT_SPECIAL_API(hipMemset3D, (TRACE_MCMD), &pitchedDevPtr, value, &extent); - size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - return ihipLogStatus(ihipMemsetSync(pitchedDevPtr.ptr, value, sizeBytes, nullptr, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(pitchedDevPtr.ptr, pitchedDevPtr.pitch ,value, extent.width, pitchedDevPtr.ysize, extent.height, extent.depth, hipStreamNull, ihipMemsetDataTypeChar, false)); } hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent ,hipStream_t stream ) { HIP_INIT_SPECIAL_API(hipMemset3DAsync, (TRACE_MCMD), &pitchedDevPtr, value, &extent); - size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth; - return ihipLogStatus(ihipMemsetAsync(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar)); + return ihipLogStatus(ihipMemsetND(pitchedDevPtr.ptr,pitchedDevPtr.pitch, value, extent.width, pitchedDevPtr.ysize, extent.height, extent.depth, stream, ihipMemsetDataTypeChar, true)); } hipError_t hipMemGetInfo(size_t* free, size_t* total) { @@ -2239,8 +2303,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { auto device = ctx->getWriteableDevice(); if (total) { *total = device->_props.totalGlobalMem; - } else { - e = hipErrorInvalidValue; } if (free) { @@ -2263,8 +2325,6 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) { } else { return ihipLogStatus(hipErrorInvalidValue); } - } else { - e = hipErrorInvalidValue; } } else { @@ -2456,7 +2516,7 @@ hipError_t hipIpcOpenMemHandle(void** devPtr, hipIpcMemHandle_t handle, unsigned crit->peerAgents(), devPtr) != HSA_STATUS_SUCCESS) return ihipLogStatus(hipErrorRuntimeOther); - hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, sizeof(*devPtr), acc, true, true); + hc::AmPointerInfo ampi(NULL, *devPtr, *devPtr, iHandle->psize, acc, true, true); am_status_t am_status = hc::am_memtracker_add(*devPtr,ampi); if (am_status != AM_SUCCESS) return ihipLogStatus(hipErrorMapFailed); diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 0a7348a3a2..0f608d9843 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -50,7 +50,7 @@ THE SOFTWARE. #include #include #include -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "hip_fatbin.h" // TODO Use Pool APIs from HCC to get memory regions. @@ -140,7 +140,7 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global uint32_t localWorkSizeZ, size_t sharedMemBytes, hipStream_t hStream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, bool isStreamLocked = 0, - void** impCoopParams = 0) { + void** impCoopParams = 0, hc::accelerator_view* coopAV = 0) { using namespace hip_impl; auto ctx = ihipGetTlsDefaultCtx(); @@ -192,8 +192,8 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global if (impCoopParams) { const auto p{static_cast(*impCoopParams)}; // The sixth index is for multi-grid synchronization - kernargs.insert((kernargs.cend() - padSize - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT, - p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT); + copy(p, p + HIP_IMPLICIT_KERNARG_ALIGNMENT, + (kernargs.end() - HIP_IMPLICIT_KERNARG_SIZE) + 6 * HIP_IMPLICIT_KERNARG_ALIGNMENT); } /* @@ -245,6 +245,10 @@ hipError_t ihipModuleLaunchKernel(TlsData *tls, hipFunction_t f, uint32_t global hc::completion_future cf; + if (coopAV) { + lp.av = coopAV; + } + lp.av->dispatch_hsa_kernel(&aql, kernargs.data(), kernargs.size(), (startEvent || stopEvent) ? &cf : nullptr #if (__hcc_workweek__ > 17312) @@ -326,22 +330,18 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList return hipErrorInvalidValue; } - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - if (kds == nullptr) { - return hipErrorNotInitialized; - } + std::vector kds(numDevices,0); // prepare all kernel descriptors for each device as all streams will be locked in the next loop for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; if (lp.stream == nullptr) { - free(kds); return hipErrorNotInitialized; } kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), hip_impl::target_agent(lp.stream)); + if (kds[i] == nullptr) { - free(kds); return hipErrorInvalidValue; } if (!kds[i]->_kernarg_layout.empty()) continue; @@ -396,8 +396,6 @@ hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList #endif } - free(kds); - return result; } @@ -409,6 +407,90 @@ hipError_t hipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, return ihipExtLaunchMultiKernelMultiDevice(launchParamsList, numDevices, flags, ps); } +void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) +{ + if (f->_is_code_object_v3) { + const auto header = reinterpret_cast(f->_header); + // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) + *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; + // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 + // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) + *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; + *usedLDS = header->group_segment_fixed_size; + } + else { + const auto header = f->_header; + // VGPRs granularity is 4 + *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; + // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 + *usedSGPRS = header->wavefront_sgpr_count + 2; + *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; + *usedLDS = header->workgroup_group_segment_byte_size; + } +} + +static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( + TlsData *tls, int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + using namespace hip_impl; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx == nullptr) { + return hipErrorInvalidDevice; + } + if (numBlocks == nullptr) { + return hipErrorInvalidValue; + } + + hipDeviceProp_t prop{}; + ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + + if (blockSize > prop.maxThreadsPerBlock) { + *numBlocks = 0; + return hipSuccess; + } + + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); + + // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + + const size_t simdPerCU = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; + + size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; + + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); + size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); + + // Calculate blocks occupancy per CU based on VGPR usage + *numBlocks = vgprs_alu_occupancy / numWavefronts; + + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + *numBlocks = std::min(*numBlocks, (int) (sgprs_alu_occupancy / numWavefronts)); + + size_t total_used_lds = usedLDS + dynSharedMemPerBlk; + if (total_used_lds != 0) { + // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) + size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; + *numBlocks = std::min(*numBlocks, (int) lds_occupancy); + } + + return hipSuccess; +} + namespace { // kernel for initializing GWS // nwm1 is the total number of work groups minus 1 @@ -417,27 +499,29 @@ __global__ void init_gws(uint nwm1) { } } -__attribute__((visibility("default"))) hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, - dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, + dim3 blockDim, void** kernelParams, unsigned int sharedMemBytes, hipStream_t stream, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20115) hipError_t result; - if ((f == nullptr) || (stream == nullptr) || (kernelParams == nullptr)) { + if (f == nullptr || kernelParams == nullptr) { return hipErrorNotInitialized; } - if (!stream->getDevice()->_props.cooperativeLaunch) { + stream = ihipSyncAndResolveStream(stream); + + if (!stream->getDevice()->_props.cooperativeLaunch || + blockDim.x * blockDim.y * blockDim.z > stream->getDevice()->_props.maxThreadsPerBlock) { return hipErrorInvalidConfiguration; } - size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDimX.x; - size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDimX.y; - size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDimX.z; - if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) - { + size_t globalWorkSizeX = (size_t)gridDim.x * (size_t)blockDim.x; + size_t globalWorkSizeY = (size_t)gridDim.y * (size_t)blockDim.y; + size_t globalWorkSizeZ = (size_t)gridDim.z * (size_t)blockDim.z; + if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; } @@ -469,28 +553,49 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, kd->_kernarg_layout = *reinterpret_cast>*>(kargs.getHandle()); + GET_TLS(); + int numBlocksPerSm = 0; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kd, + blockDim.x * blockDim.y * blockDim.z, sharedMemBytes); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (gridDim.x * gridDim.y * gridDim.z > maxActiveBlocks) { + return hipErrorCooperativeLaunchTooLarge; + } void *gwsKernelParam[1]; // calculate total number of work groups minus 1 for the main kernel uint nwm1 = (gridDim.x * gridDim.y * gridDim.z) - 1; gwsKernelParam[0] = &nwm1; - LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) - streamCrit->_av.acquire_locked_hsa_queue(); -#endif + hc::accelerator acc = stream->getDevice()->_acc; + // create a cooperative accelerated view for launching gws and main kernels + hc::accelerator_view coopAV = acc.create_cooperative_view(); - GET_TLS(); - // launch the init_gws kernel to initialize the GWS + LockedAccessor_StreamCrit_t streamCrit(stream->criticalData(), false); + + // the cooperative queue will wait until this stream completes its operations + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + coopAV.create_blocking_marker(streamCF, hc::accelerator_scope); + } + + streamCrit->_av.acquire_locked_hsa_queue(); + coopAV.acquire_locked_hsa_queue(); + + // launch the init_gws kernel to initialize the GWS in the dedicated cooperative queue result = ihipModuleLaunchKernel(tls, gwsKD, 1, 1, 1, 1, 1, 1, - 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr , &coopAV); if (result != hipSuccess) { stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) stream->criticalData()._av.release_locked_hsa_queue(); -#endif - + coopAV.release_locked_hsa_queue(); return hipErrorLaunchFailure; } @@ -498,60 +603,106 @@ hipError_t ihipLaunchCooperativeKernel(const void* f, dim3 gridDim, void* impCoopParams[1]; impCoopParams[0] = &impCoopArg; - // launch the main kernel + // launch the main kernel in the cooperative queue result = ihipModuleLaunchKernel(tls, kd, - gridDim.x * blockDimX.x, - gridDim.y * blockDimX.y, - gridDim.z * blockDimX.z, - blockDimX.x, blockDimX.y, blockDimX.z, + gridDim.x * blockDim.x, + gridDim.y * blockDim.y, + gridDim.z * blockDim.z, + blockDim.x, blockDim.y, blockDim.z, sharedMemBytes, stream, kernelParams, nullptr, nullptr, - nullptr, 0, true, impCoopParams); + nullptr, 0, true, impCoopParams, &coopAV); + + + coopAV.release_locked_hsa_queue(); + stream->criticalData()._av.release_locked_hsa_queue(); + + // this stream will wait until the cooperative queue completes its operations + hc::completion_future cooperativeCF; + if (!coopAV.get_is_empty()) { + cooperativeCF = coopAV.create_marker(hc::accelerator_scope); + streamCrit->_av.create_blocking_marker(cooperativeCF, hc::accelerator_scope); + } stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) - stream->criticalData()._av.release_locked_hsa_queue(); -#endif return result; +#else + return hipErrorInvalidConfiguration; +#endif + } __attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernel(const void* func, dim3 gridDim, + dim3 blockDim, void** args, + size_t sharedMem, hipStream_t stream, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernel, func, gridDim, blockDim, args, + sharedMem, stream); + + return ihipLogStatus(ihipLaunchCooperativeKernel(func, gridDim, blockDim, + args, sharedMem, stream, ps)); +} + + hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags, hip_impl::program_state& ps) { +#if (__hcc_workweek__ >= 20115) hipError_t result; if (numDevices > g_deviceCnt || launchParamsList == nullptr || numDevices > MAX_COOPERATIVE_GPUs) { return hipErrorInvalidValue; } + vector streams; + vector deviceIDs; + // check to see if we have valid distinct streams/devices, if cooperative multi device + // launch is supported and if grid/block dimensions are valid for (int i = 0; i < numDevices; ++i) { - if (!launchParamsList[i].stream->getDevice()->_props.cooperativeMultiDeviceLaunch) { + const hipLaunchParams& lp = launchParamsList[i]; + + if (lp.stream == nullptr){ + return hipErrorInvalidResourceHandle; + } + + if (find(streams.begin(), streams.end(), lp.stream) == streams.end()) { + streams.push_back(lp.stream); + } else { + return hipErrorInvalidDevice; + } + + const ihipDevice_t* currentDevice = lp.stream->getDevice(); + if (find(deviceIDs.begin(), deviceIDs.end(), currentDevice->_deviceId) == deviceIDs.end()) { + deviceIDs.push_back(currentDevice->_deviceId); + } else { + return hipErrorInvalidDevice; + } + + if (!currentDevice->_props.cooperativeMultiDeviceLaunch) { + return hipErrorInvalidConfiguration; + } + + if (lp.gridDim.x == 0 || lp.gridDim.y == 0 || lp.gridDim.z == 0 || + lp.blockDim.x == 0 || lp.blockDim.y == 0 || lp.blockDim.z == 0 || + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z > currentDevice->_props.maxThreadsPerBlock){ return hipErrorInvalidConfiguration; } } - hipFunction_t* gwsKds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - hipFunction_t* kds = reinterpret_cast(malloc(sizeof(hipFunction_t) * numDevices)); - - if (kds == nullptr || gwsKds == nullptr) { - return hipErrorNotInitialized; - } + vector gwsKds; + vector kds; + GET_TLS(); // prepare all kernel descriptors for initializing the GWS and the main kernels per device for (int i = 0; i < numDevices; ++i) { const hipLaunchParams& lp = launchParamsList[i]; - if (lp.stream == nullptr) { - free(gwsKds); - free(kds); - return hipErrorNotInitialized; - } - gwsKds[i] = ps.kernel_descriptor(reinterpret_cast(&init_gws), - hip_impl::target_agent(lp.stream)); + gwsKds.push_back(ps.kernel_descriptor(reinterpret_cast(&init_gws), + hip_impl::target_agent(lp.stream))); if (gwsKds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align gwsKargs = ps.get_kernargs_size_align( @@ -560,24 +711,42 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKargs.getHandle()); - kds[i] = ps.kernel_descriptor(reinterpret_cast(lp.func), - hip_impl::target_agent(lp.stream)); + kds.push_back(ps.kernel_descriptor(reinterpret_cast(lp.func), + hip_impl::target_agent(lp.stream))); if (kds[i] == nullptr) { - free(gwsKds); - free(kds); return hipErrorInvalidValue; } hip_impl::kernargs_size_align kargs = ps.get_kernargs_size_align( reinterpret_cast(lp.func)); kds[i]->_kernarg_layout = *reinterpret_cast>*>( kargs.getHandle()); + + int numBlocksPerSm = 0; + result = ihipOccupancyMaxActiveBlocksPerMultiprocessor(tls, &numBlocksPerSm, kds[i], + lp.blockDim.x * lp.blockDim.y * lp.blockDim.z, lp.sharedMem); + if (result != hipSuccess) { + return hipErrorLaunchFailure; + } + int maxActiveBlocks = numBlocksPerSm * lp.stream->getDevice()->_props.multiProcessorCount; + + //check to see if the workload fits on the GPU + if (lp.gridDim.x * lp.gridDim.y * lp.gridDim.z > maxActiveBlocks) { + return hipErrorCooperativeLaunchTooLarge; + } + } + + vector coopAVs; + + // create cooperative accelerated views for launching gws and main kernels on each device + for (int i = 0; i < numDevices; ++i) { + hc::accelerator acc = launchParamsList[i].stream->getDevice()->_acc; + coopAVs.push_back(acc.create_cooperative_view()); } mg_sync *mg_sync_ptr = 0; - mg_info *mg_info_ptr[MAX_COOPERATIVE_GPUs] = {0}; + vector mg_info_ptr; - GET_TLS(); - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault); + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_sync_ptr, sizeof(mg_sync), hipHostMallocDefault, true); if (result != hipSuccess) { return hipErrorInvalidValue; } @@ -586,7 +755,8 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL uint all_sum = 0; for (int i = 0; i < numDevices; ++i) { - result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_ptr[i], sizeof(mg_info), hipHostMallocDefault); + mg_info *mg_info_temp = nullptr; + result = hip_internal::ihipHostMalloc(tls, (void **)&mg_info_temp, sizeof(mg_info), hipHostMallocDefault, true); if (result != hipSuccess) { hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < i; ++j) { @@ -594,6 +764,7 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } return hipErrorInvalidValue; } + mg_info_ptr.push_back(mg_info_temp); // calculate the sum of sizes of all grids const hipLaunchParams& lp = launchParamsList[i]; all_sum += lp.blockDim.x * lp.blockDim.y * lp.blockDim.z * @@ -603,9 +774,26 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL // lock all streams before launching the blit kernels for initializing the GWS and main kernels to each device for (int i = 0; i < numDevices; ++i) { LockedAccessor_StreamCrit_t streamCrit(launchParamsList[i].stream->criticalData(), false); -#if (__hcc_workweek__ >= 19213) - streamCrit->_av.acquire_locked_hsa_queue(); -#endif + + hc::completion_future streamCF; + if (!streamCrit->_av.get_is_empty()) { + streamCF = streamCrit->_av.create_marker(hc::accelerator_scope); + if (flags & hipCooperativeLaunchMultiDeviceNoPreSync) { + coopAVs[i].create_blocking_marker(streamCF, hc::accelerator_scope); + streamCrit->_av.acquire_locked_hsa_queue(); + coopAVs[i].acquire_locked_hsa_queue(); + } else { + for (int j = 0; j < numDevices; ++j) { + coopAVs[j].create_blocking_marker(streamCF, hc::accelerator_scope); + } + } + } + } + if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + launchParamsList[i].stream->criticalData()._av.acquire_locked_hsa_queue(); + coopAVs[i].acquire_locked_hsa_queue(); + } } // launch the init_gws kernel to initialize the GWS for each device @@ -617,14 +805,13 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL gwsKernelParam[0] = &nwm1; result = ihipModuleLaunchKernel(tls, gwsKds[i], 1, 1, 1, 1, 1, 1, - 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true); + 0, lp.stream, gwsKernelParam, nullptr, nullptr, nullptr, 0, true, nullptr, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { @@ -652,14 +839,14 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL prev_sum += lp.blockDim.x * lp.blockDim.y * lp.blockDim.z * lp.gridDim.x * lp.gridDim.y * lp.gridDim.z; + lp.stream->coopMemsTracker.push_back(mg_info_ptr[i]); impCoopParams[0] = &mg_info_ptr[i]; globalWorkSizeX = (size_t)lp.gridDim.x * (size_t)lp.blockDim.x; globalWorkSizeY = (size_t)lp.gridDim.y * (size_t)lp.blockDim.y; globalWorkSizeZ = (size_t)lp.gridDim.z * (size_t)lp.blockDim.z; - if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) - { + if(globalWorkSizeX > UINT32_MAX || globalWorkSizeY > UINT32_MAX || globalWorkSizeZ > UINT32_MAX) { return hipErrorInvalidConfiguration; } @@ -670,18 +857,18 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL lp.blockDim.x, lp.blockDim.y, lp.blockDim.z, lp.sharedMem, lp.stream, lp.args, nullptr, nullptr, nullptr, 0, - true, impCoopParams); + true, impCoopParams, &coopAVs[i]); if (result != hipSuccess) { for (int j = 0; j < numDevices; ++j) { launchParamsList[j].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) launchParamsList[j].stream->criticalData()._av.release_locked_hsa_queue(); -#endif + coopAVs[i].release_locked_hsa_queue(); } hip_internal::ihipHostFree(tls, mg_sync_ptr); for (int j = 0; j < numDevices; ++j) { hip_internal::ihipHostFree(tls, mg_info_ptr[j]); + launchParamsList[j].stream->coopMemsTracker.pop_back(); } return hipErrorLaunchFailure; @@ -689,23 +876,55 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL } - // unlock all streams + // unlock streams and create blocking markers on them based on the workload + // on cooperative queues on each device for (int i = 0; i < numDevices; ++i) { - launchParamsList[i].stream->criticalData().unlock(); -#if (__hcc_workweek__ >= 19213) + coopAVs[i].release_locked_hsa_queue(); launchParamsList[i].stream->criticalData()._av.release_locked_hsa_queue(); -#endif } - free(gwsKds); - free(kds); + for (int i = 0; i < numDevices; ++i) { + hc::completion_future cooperativeCF; + if (!coopAVs[i].get_is_empty()) { + cooperativeCF = coopAVs[i].create_marker(hc::accelerator_scope); + if (flags & hipCooperativeLaunchMultiDeviceNoPostSync) { + launchParamsList[i].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + launchParamsList[i].stream->criticalData().unlock(); + } else { + for (int j = 0; j < numDevices; ++j) { + launchParamsList[j].stream->criticalData()._av.create_blocking_marker( + cooperativeCF, hc::accelerator_scope); + } + } + } + } - hip_internal::ihipHostFree(tls, mg_sync_ptr); - for (int j = 0; j < numDevices; ++j) { - hip_internal::ihipHostFree(tls, mg_info_ptr[j]); + if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { + for (int i = 0; i < numDevices; ++i) { + launchParamsList[i].stream->criticalData().unlock(); + } } return result; +#else + return hipErrorInvalidConfiguration; +#endif +} + +__attribute__((visibility("default"))) +hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, + int numDevices, + unsigned int flags, + hip_impl::program_state& ps) { + + // Skipping passing in ps, because the logging function does not like it + HIP_INIT_API(hipLaunchCooperativeKernelMultiDevice, launchParamsList, + numDevices, flags); + + return ihipLogStatus(ihipLaunchCooperativeKernelMultiDevice(launchParamsList, + numDevices, + flags, ps)); } namespace hip_impl { @@ -959,26 +1178,11 @@ inline hsa_status_t remove_agent_global_variables(hsa_executable_t, hsa_agent_t hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname, hsa_agent_t* agent = nullptr) { using namespace hip_impl; - - pair r{kname, {}}; - - hsa_executable_iterate_agent_symbols( - executable, agent ? *agent : this_agent(), - [](hsa_executable_t, hsa_agent_t, hsa_executable_symbol_t x, void* s) { - auto p = static_cast*>(s); - - if (type(x) != HSA_SYMBOL_KIND_KERNEL) { - return HSA_STATUS_SUCCESS; - } - if (name(x) != p->first) return HSA_STATUS_SUCCESS; - - p->second = x; - - return HSA_STATUS_INFO_BREAK; - }, - &r); - - return r.second; + hsa_executable_symbol_t symbol = { 0 }; + hsa_agent_t thisagent = agent ? *agent : this_agent(); + hsa_status_t err = hsa_executable_get_symbol_by_name(executable, kname, &thisagent ,&symbol); + //TODO check err ? + return symbol; } @@ -995,8 +1199,7 @@ string read_elf_file_as_string(const void* file) { auto h = static_cast(file); auto s = static_cast(file); // This assumes the common case of SHT being the last part of the ELF. - auto sz = - sizeof(ELFIO::Elf64_Ehdr) + h->e_shoff + h->e_shentsize * h->e_shnum; + auto sz = h->e_shoff + h->e_shentsize * h->e_shnum; return string{s, s + sz}; } @@ -1120,7 +1323,7 @@ const amd_kernel_code_v3_t *header_v3(const ihipModuleSymbol_t& kd) { return reinterpret_cast(kd._header); } -hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_t& kd) { +hipFuncAttributes make_function_attributes(TlsData *tls, ihipModuleSymbol_t& kd) { hipFuncAttributes r{}; hipDeviceProp_t prop{}; @@ -1130,23 +1333,57 @@ hipFuncAttributes make_function_attributes(TlsData *tls, const ihipModuleSymbol_ prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; if (kd._is_code_object_v3) { - r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; - r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; - r.numRegs = ((header_v3(kd)->compute_pgm_rsrc1 & 0x3F) + 1) << 2; r.binaryVersion = 0; // FIXME: should it be the ISA version or code // object format version? + r.localSizeBytes = header_v3(kd)->private_segment_fixed_size; + r.sharedSizeBytes = header_v3(kd)->group_segment_fixed_size; } else { r.localSizeBytes = kd._header->workitem_private_segment_byte_size; r.sharedSizeBytes = kd._header->workgroup_group_segment_byte_size; - r.numRegs = kd._header->workitem_vgpr_count; r.binaryVersion = kd._header->amd_machine_version_major * 10 + kd._header->amd_machine_version_minor; } r.maxDynamicSharedSizeBytes = prop.sharedMemPerBlock - r.sharedSizeBytes; - r.maxThreadsPerBlock = r.numRegs ? - std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) : - prop.maxThreadsPerBlock; + + size_t usedVGPRS = 0; + size_t usedSGPRS = 0; + size_t usedLDS = 0; + getGprsLdsUsage(&kd, &usedVGPRS, &usedSGPRS, &usedLDS); + + r.numRegs = usedVGPRS; + + size_t wavefrontSize = prop.warpSize; + size_t maxWavefrontsPerBlock = prop.maxThreadsPerBlock / wavefrontSize; + size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); + const size_t numSIMD = 4; + const size_t maxWavesPerSimd = maxWavefrontsPerCU / numSIMD; + size_t maxWaves = 0; + for (int i = 0; i < maxWavefrontsPerBlock; i++) { + size_t wavefronts = i + 1; + + if (usedVGPRS > 0) { + size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / numSIMD); + size_t vgprs_alu_occupancy = numSIMD * std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS); + + // Calculate blocks occupancy per CU based on VGPR usage + if (vgprs_alu_occupancy < wavefronts) + break; + } + + if (usedSGPRS > 0) { + const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; + size_t sgprs_alu_occupancy = numSIMD * ((usedSGPRS == 0) ? maxWavesPerSimd + : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); + + // Calculate blocks occupancy per CU based on SGPR usage + if (sgprs_alu_occupancy < wavefronts) + break; + } + maxWaves = wavefronts; + } + + r.maxThreadsPerBlock = maxWaves * wavefrontSize; r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. return r; @@ -1294,32 +1531,9 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const return ihipLogStatus(hipSuccess); } -void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size_t* usedLDS) -{ - if (f->_is_code_object_v3) { - const auto header = reinterpret_cast(f->_header); - // GRANULATED_WAVEFRONT_VGPR_COUNT is specified in 0:5 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx6-gfx9 is max(0, ceil(vgprs_used / 4) - 1) - *usedVGPRS = ((header->compute_pgm_rsrc1 & 0x3F) + 1) << 2; - // GRANULATED_WAVEFRONT_SGPR_COUNT is specified in 6:9 bits of COMPUTE_PGM_RSRC1 - // the granularity for gfx9+ is 2 * max(0, ceil(sgprs_used / 16) - 1) - *usedSGPRS = ((((header->compute_pgm_rsrc1 & 0x3C0) >> 6) >> 1) + 1) << 4; - *usedLDS = header->group_segment_fixed_size; - } - else { - const auto header = f->_header; - // VGPRs granularity is 4 - *usedVGPRS = ((header->workitem_vgpr_count + 3) >> 2) << 2; - // adding 2 to take into account the 2 VCC registers & handle the granularity of 16 - *usedSGPRS = header->wavefront_sgpr_count + 2; - *usedSGPRS = ((*usedSGPRS + 15) >> 4) << 4; - *usedLDS = header->workgroup_group_segment_byte_size; - } -} - -hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, uint32_t* blockSize, +hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit) + int blockSizeLimit) { using namespace hip_impl; @@ -1429,104 +1643,66 @@ hipError_t ihipOccupancyMaxPotentialBlockSize(TlsData *tls, uint32_t* gridSize, return hipSuccess; } -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit) + int blockSizeLimit) { - HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit); + return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls, + gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, gridSize, blockSize, f, dynSharedMemPerBlk, + blockSizeLimit, flags); + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); return ihipLogStatus(ihipOccupancyMaxPotentialBlockSize(tls, gridSize, blockSize, f, dynSharedMemPerBlk, blockSizeLimit)); } -hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( - TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) -{ - using namespace hip_impl; - - auto ctx = ihipGetTlsDefaultCtx(); - if (ctx == nullptr) { - return hipErrorInvalidDevice; - } - - hipDeviceProp_t prop{}; - ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); - - prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; - - size_t usedVGPRS = 0; - size_t usedSGPRS = 0; - size_t usedLDS = 0; - getGprsLdsUsage(f, &usedVGPRS, &usedSGPRS, &usedLDS); - - // Due to SPI and private memory limitations, the max of wavefronts per CU in 32 - size_t wavefrontSize = prop.warpSize; - size_t maxWavefrontsPerCU = min(prop.maxThreadsPerMultiProcessor / wavefrontSize, 32); - - const size_t simdPerCU = 4; - const size_t maxWavesPerSimd = maxWavefrontsPerCU / simdPerCU; - - size_t numWavefronts = (blockSize + wavefrontSize - 1) / wavefrontSize; - - size_t availableVGPRs = (prop.regsPerBlock / wavefrontSize / simdPerCU); - size_t vgprs_alu_occupancy = simdPerCU * (usedVGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableVGPRs / usedVGPRS)); - - // Calculate blocks occupancy per CU based on VGPR usage - *numBlocks = vgprs_alu_occupancy / numWavefronts; - - const size_t availableSGPRs = (prop.gcnArch < 800) ? 512 : 800; - size_t sgprs_alu_occupancy = simdPerCU * (usedSGPRS == 0 ? maxWavesPerSimd - : std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS)); - - // Calculate blocks occupancy per CU based on SGPR usage - *numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts)); - - size_t total_used_lds = usedLDS + dynSharedMemPerBlk; - if (total_used_lds != 0) { - // Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds) - size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds; - *numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy); - } - - return hipSuccess; -} - hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); + auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f), + hip_impl::target_agent(0)); + return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( + tls, numBlocks, F, blockSize, dynSharedMemPerBlk)); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( - int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) -{ - HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); - - return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); -} - hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); - + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); + auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f), + hip_impl::target_agent(0)); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, F, blockSize, dynSharedMemPerBlk)); } -hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) { - HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); + if(flags != hipOccupancyDefault) return ihipLogStatus(hipErrorNotSupported); return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( - tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); + tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } hipError_t hipLaunchKernel( diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 2add6a77c4..5b56b71cd8 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -63,11 +63,11 @@ hipError_t ihipStreamCreate(TlsData *tls, hipStream_t* stream, unsigned int flag // TODO - se try-catch loop to detect memory exception? // - // Note this is an execute_any_order queue, + // Note this is an execute_any_order queue, // CUDA stream behavior is that all kernels submitted will automatically - // wait for prev to complete, this behaviour will be mainatined by - // hipModuleLaunchKernel. execute_any_order will help - // hipExtModuleLaunchKernel , which uses a special flag + // wait for prev to complete, this behaviour will be mainatined by + // hipModuleLaunchKernel. execute_any_order will help + // hipExtModuleLaunchKernel , which uses a special flag { // Obtain mutex access to the device critical data, release by destructor @@ -130,18 +130,19 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_SPECIAL_API(hipStreamWaitEvent, TRACE_SYNC, stream, event, flags); - hipError_t e = hipSuccess; + if (!event) return ihipLogStatus(hipErrorInvalidHandle); - if (event == nullptr) { - e = hipErrorInvalidHandle; - - } else { - auto ecd = event->locked_copyCrit(); + auto ecd = event->locked_copyCrit(); + if (event->_flags & hipEventInterprocess) { + // this is an IPC event + if (ecd._ipc_shmem->read_index >= 0) { + // we have at least one recorded event, so proceed + stream->locked_streamWaitEvent(ecd); + } + } + else { if ((ecd._state != hipEventStatusUnitialized) && (ecd._state != hipEventStatusCreated)) { if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { - // conservative wait on host for the specified event to complete: - // return _stream->locked_eventWaitComplete(this, waitMode); - // ecd.marker().wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { @@ -150,9 +151,9 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int stream->locked_streamWaitEvent(ecd); } } - } // else event not recorded, return immediately and don't create marker. + } - return ihipLogStatus(e); + return ihipLogStatus(hipSuccess); }; @@ -257,11 +258,39 @@ hipError_t hipStreamGetPriority(hipStream_t stream, int* priority) { hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - hipError_t e = hipSuccess; - // Create a thread in detached mode to handle callback - ihipStreamCallback_t* cb = new ihipStreamCallback_t(stream, callback, userData); - std::thread(ihipStreamCallbackHandler, cb).detach(); + auto stream_original{stream}; + stream = ihipSyncAndResolveStream(stream); - return ihipLogStatus(e); + if (!stream) return hipErrorInvalidValue; + + LockedAccessor_StreamCrit_t cs{stream->criticalData()}; + + // create first marker + auto cf = cs->_av.create_marker(hc::no_scope); + // get its signal + auto signal = *reinterpret_cast(cf.get_native_handle()); + // increment its signal value + hsa_signal_add_relaxed(signal, 1); + + // create callback that can be passed to hsa_amd_signal_async_handler + // this function will call the user's callback, then sets first packet's signal to 0 to indicate completion + auto t{new std::function{[=]() { + callback(stream_original, hipSuccess, userData); + hsa_signal_store_relaxed(signal, 0); + }}}; + + // register above callback with HSA runtime to be called when first packet's signal + // is decremented from 2 to 1 by CP (or it is already at 1) + hsa_amd_signal_async_handler(signal, HSA_SIGNAL_CONDITION_EQ, 1, + [](hsa_signal_value_t x, void* p) { + (*static_cast(p))(); + delete static_cast(p); + return false; + }, t); + + // create additional marker that blocks on the first one + cs->_av.create_blocking_marker(cf, hc::no_scope); + + return ihipLogStatus(hipSuccess); } diff --git a/src/hip_texture.cpp b/src/hip_texture.cpp index 27cf321fbc..29f0465dc1 100644 --- a/src/hip_texture.cpp +++ b/src/hip_texture.cpp @@ -301,7 +301,12 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); - + if(hipResourceTypeLinear == pResDesc->resType) { + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + } else if(!pTexDesc->normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if(hipResourceTypePitch2D != pResDesc->resType) @@ -312,6 +317,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return ihipLogStatus(hipErrorRuntimeOther); } @@ -438,7 +444,13 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r imageDescriptor.format.channel_type = channelType; hsa_ext_sampler_descriptor_t samplerDescriptor; - fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + if (normalizedCoords) { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED; + } else { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; @@ -449,6 +461,7 @@ hipError_t ihipBindTextureImpl(TlsData *tls_, int dim, enum hipTextureReadMode r HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, rowPitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); @@ -514,7 +527,9 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; if( 0 == pitch) @@ -525,6 +540,7 @@ hipError_t ihipBindTexture2DImpl(TlsData *tls, int dim, enum hipTextureReadMode HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR, pitch, 0, &(pTexture->image)) || HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + free(pTexture); return hipErrorRuntimeOther; } getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); @@ -620,7 +636,9 @@ hipError_t ihipBindTextureToArrayImpl(TlsData *tls_, int dim, enum hipTextureRea hsa_ext_sampler_descriptor_t samplerDescriptor; fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); - + if(!normalizedCoords) { + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + } hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; size_t rowPitch = getElementSize(channelOrder, channelType) * alignUp(imageDescriptor.width, IMAGE_PITCH_ALIGNMENT); diff --git a/src/hiprtc.cpp b/src/hiprtc.cpp index e9a516c339..a11207f337 100644 --- a/src/hiprtc.cpp +++ b/src/hiprtc.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "../include/hip/hiprtc.h" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/elfio/elfio.hpp" #include "../include/hip/hcc_detail/program_state.hpp" diff --git a/src/program_state.cpp b/src/program_state.cpp index 5e9f9976be..975dcda321 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -68,7 +68,14 @@ namespace hip_impl { const size_t data_size, hsa_executable_t executable, hsa_agent_t agent) { - return impl->load_executable(data, data_size, executable, agent); + return impl->load_executable(data, data_size, true, executable, agent); + } + + hsa_executable_t program_state::load_executable_no_copy(const char* data, + const size_t data_size, + hsa_executable_t executable, + hsa_agent_t agent) { + return impl->load_executable(data, data_size, false, executable, agent); } hipFunction_t program_state::kernel_descriptor(std::uintptr_t function_address, diff --git a/src/program_state.inl b/src/program_state.inl index 548a56795f..0314c7d4ed 100644 --- a/src/program_state.inl +++ b/src/program_state.inl @@ -1,6 +1,6 @@ #include "../include/hip/hcc_detail/program_state.hpp" -#include "../include/hip/hcc_detail/code_object_bundle.hpp" +#include "code_object_bundle.inl" #include "../include/hip/hcc_detail/hsa_helpers.hpp" #if !defined(__cpp_exceptions) @@ -357,8 +357,11 @@ public: const auto it1 = get_symbol_addresses().find(x); if (it1 == get_symbol_addresses().cend()) { - hip_throw(std::runtime_error{ - "Global symbol: " + x + " is undefined."}); + // For a unknown symbol, initialize it with a magic poison + hsa_executable_agent_global_variable_define( + executable, agent, x.c_str(), + reinterpret_cast(0xDEADBEEFDEADBEEFull)); + continue; } hsa_status_t status; @@ -405,11 +408,13 @@ public: } void load_code_object_and_freeze_executable( - const std::string& file, hsa_agent_t agent, hsa_executable_t executable) { + const char* data, + const size_t data_size, bool make_copy, + hsa_agent_t agent, hsa_executable_t executable) { // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. - if (file.empty()) return; + if (!data_size) return; static const auto cor_deleter = [] (hsa_code_object_reader_t* p) { if (!p) return; @@ -422,8 +427,16 @@ public: decltype(code_readers.second)::iterator it; { std::lock_guard lck{code_readers.first}; + + std::string file; + if (make_copy) + file = std::string(data, data_size); + code_readers.second.emplace_back(move(file), move(tmp)); it = std::prev(code_readers.second.end()); + + if (make_copy) + data = it->first.data(); } auto check_hsa_error = [](hsa_status_t s) { @@ -437,7 +450,7 @@ public: }; check_hsa_error(hsa_code_object_reader_create_from_memory( - it->first.data(), it->first.size(), it->second.get())); + data, data_size, it->second.get())); check_hsa_error(hsa_executable_load_agent_code_object( executable, agent, *it->second, nullptr, nullptr)); @@ -484,7 +497,7 @@ public: // TODO: this is massively inefficient and only meant for // illustration. - tmp = impl.load_executable(blob.data(), blob.size(), tmp, a); + tmp = impl.load_executable(blob.data(), blob.size(), true, tmp, a); if (tmp.handle) current_exes.push_back(tmp); } @@ -502,6 +515,7 @@ public: hsa_executable_t load_executable(const char* data, const size_t data_size, + bool make_copy, hsa_executable_t executable, hsa_agent_t agent) { ELFIO::elfio reader; @@ -518,7 +532,7 @@ public: code_object_dynsym, agent, executable); - load_code_object_and_freeze_executable(move(ts), agent, executable); + load_code_object_and_freeze_executable(data, data_size, make_copy, agent, executable); return executable; } diff --git a/src/trace_helper.h b/src/trace_helper.h index 84f218a438..202a302f70 100644 --- a/src/trace_helper.h +++ b/src/trace_helper.h @@ -71,6 +71,11 @@ inline std::string ToString(hipEvent_t v) { ss << v; return ss.str(); }; +// hipIpcEventHandle_t specialization. TODO +template <> +inline std::string ToString(hipIpcEventHandle_t v) { + return std::string{}; +}; // hipStream_t template <> inline std::string ToString(hipStream_t v) { diff --git a/tests/hipify-clang/lit.cfg b/tests/hipify-clang/lit.cfg deleted file mode 100644 index ea496e6a69..0000000000 --- a/tests/hipify-clang/lit.cfg +++ /dev/null @@ -1,151 +0,0 @@ -# -*- Python -*- -import os -import platform -import re -import subprocess -import struct - -import lit.formats -import lit.util - -# Configuration file for the 'lit' test runner. -site_cfg = lit_config.params.get('site_config', None) -lit_config.load_config(config, site_cfg) - -config.excludes = ['cmdparser.hpp'] -config.excludes.append('spatial_batch_norm_op.h') -config.excludes.append('common_cudnn.h') - -delimiter = "==============================================================="; -print(delimiter) -print("CUDA " + config.cuda_version + " - will be used for testing") -print("LLVM " + config.llvm_version + " - will be used for testing") -print(platform.machine() + " - Platform architecture") -print(platform.system() + " " + platform.release() + " - Platform OS") -print(str(config.pointer_size * 8) + " - hipify-clang binary bitness") -print(str(struct.calcsize("P") * 8) + " - python " + str(platform.python_version()) + " binary bitness") -print(delimiter) -warns = None -if not config.cuda_dnn_root: - config.excludes.append('cudnn_convolution_forward.cu') - config.excludes.append('cudnn_softmax.cu') - print("WARN: cuDNN tests are excluded due to unset CUDA_DNN_ROOT_DIR") - warns = True -if not config.cuda_cub_root: - config.excludes.append('cub_01.cu') - config.excludes.append('cub_02.cu') - config.excludes.append('cub_03.cu') - print("WARN: CUB tests are excluded due to unset CUDA_CUB_ROOT_DIR") - warns = True -if warns: - print(delimiter) - -if config.cuda_version_major == 7 and config.cuda_version_minor == 0: - config.excludes.append('headers_test_09.cu') - config.excludes.append('cudnn_convolution_forward.cu') -if config.cuda_version_major < 8: - config.excludes.append('cuSPARSE_02.cu') -if config.cuda_version_major < 9: - config.excludes.append('cuSPARSE_04.cu') - config.excludes.append('cuSPARSE_05.cu') - config.excludes.append('cuSPARSE_06.cu') - config.excludes.append('cuSPARSE_07.cu') - config.excludes.append('benchmark_curand_kernel.cpp') -if config.cuda_version_major < 10: - config.excludes.append('cuSPARSE_08.cu') - config.excludes.append('cuSPARSE_09.cu') - config.excludes.append('cuSPARSE_10.cu') - config.excludes.append('cuSPARSE_11.cu') - -if config.llvm_version_major < 10: - config.excludes.append('pp_if_else_conditionals_LLVM_10.cu') - config.excludes.append('pp_if_else_conditionals_01_LLVM_10.cu') - -# name: The name of this test suite. -config.name = 'hipify' - -# suffixes: CUDA source is only supported -config.suffixes = ['.cu','.cuh','.cpp','.c','.hpp','.h'] - -# testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest() - -# test_source_root: The root path where tests are located. -config.test_source_root = os.path.dirname(__file__) - -# test_exec_root: The path where tests are located (default is the test suite root). -#config.test_exec_root = config.test_source_root - -# target_triple: Used by ShTest and TclTest formats for XFAIL checks. -config.target_triple = '(unused)' - -# available_features: Used by ShTest and TclTest formats for REQUIRES checks. -config.available_features = [] - -obj_root = getattr(config, 'obj_root', None) -if obj_root is not None: - config.test_exec_root = obj_root - -if obj_root is not None: - llvm_tools_dir = getattr(config, 'llvm_tools_dir', None) - if not llvm_tools_dir: - lit_config.fatal('No LLVM tools dir set!') - path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) - config.environment['PATH'] = path - -hipify_path = obj_root - -clang_arguments = "-v" -if sys.platform in ['win32']: - run_test_ext = ".bat" - hipify_path += "/" + config.build_type - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/common/inc" -else: - run_test_ext = ".sh" - # CUDA SDK ROOT - clang_arguments += " -isystem'%s'/samples/common/inc" -if config.pointer_size == 8: - clang_arguments += " -D__LP64__" - -# Set max clang's CudaArch for corresponding CUDA version -# to support maximum CUDA features in offline tests -if config.cuda_version_major == 7: - if config.cuda_version_minor == 5: - clang_arguments += " --cuda-gpu-arch=sm_53" - else: - clang_arguments += " --cuda-gpu-arch=sm_52" -elif config.cuda_version_major == 8: - clang_arguments += " --cuda-gpu-arch=sm_62" -elif config.cuda_version_major == 9: - if config.cuda_version_minor == 2: - clang_arguments += " --cuda-gpu-arch=sm_72" - else: - clang_arguments += " --cuda-gpu-arch=sm_70" -elif config.cuda_version_major == 10: - clang_arguments += " --cuda-gpu-arch=sm_75" - -# cuDNN ROOT -if config.cuda_dnn_root: - clang_arguments += " -I'%s'/include" -# CUB ROOT -if config.cuda_cub_root: - clang_arguments += " -I'%s'" - -if config.cuda_dnn_root and config.cuda_cub_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root, config.cuda_cub_root))) -elif config.cuda_dnn_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_dnn_root))) -elif config.cuda_cub_root: - config.substitutions.append(("%clang_args", clang_arguments % (config.cuda_sdk_root, config.cuda_cub_root))) -else: - config.substitutions.append(("%clang_args", clang_arguments % config.cuda_sdk_root)) - -if config.llvm_version_major < 4: - hipify_arguments = "-I'%s'/include" -else: - hipify_arguments = "--cuda-path='%s'" - -config.substitutions.append(("%hipify_args", hipify_arguments % config.cuda_root)) -config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"')) -config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"')) diff --git a/tests/hipify-clang/lit.site.cfg.in b/tests/hipify-clang/lit.site.cfg.in deleted file mode 100644 index 3c17567903..0000000000 --- a/tests/hipify-clang/lit.site.cfg.in +++ /dev/null @@ -1,38 +0,0 @@ -import sys -import os - -config.pointer_size = @CMAKE_SIZEOF_VOID_P@ -config.llvm_version = "@LLVM_PACKAGE_VERSION@" -config.llvm_version_major = int("@LLVM_VERSION_MAJOR@") -config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" -config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" -config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" -config.cuda_dnn_root = "@CUDA_DNN_ROOT_DIR@" -config.cuda_cub_root = "@CUDA_CUB_ROOT_DIR@" -config.cuda_version_major = int("@CUDA_VERSION_MAJOR@") -config.cuda_version_minor = int("@CUDA_VERSION_MINOR@") -config.cuda_version = "@CUDA_VERSION@" -if sys.platform in ['win32']: - config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" - if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND": - cuda_version = config.cuda_version - cuda_version = cuda_version.replace('.','_') - config.cuda_samples_root = os.environ.get('NVCUDASAMPLES' + cuda_version + '_ROOT') - if not config.cuda_samples_root: - lit_config.fatal('No CUDA Samples dir set! Please set CUDA_SDK_ROOT_DIR.') - config.cuda_sdk_root = config.cuda_samples_root - config.build_type = "@CMAKE_BUILD_TYPE@" - if not config.build_type: - config.build_type = "Debug" -else: - config.cuda_sdk_root = config.cuda_root - -# Support substitution of the tools and libs dirs with user parameters. This is -# used when we can't determine the tool dir at configuration time. -try: - config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params - config.obj_root = config.obj_root % lit_config.params -except KeyError: - e = sys.exc_info()[1] - key, = e.args - lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) diff --git a/tests/hipify-clang/run_test.bat b/tests/hipify-clang/run_test.bat deleted file mode 100644 index 8b10bc8c2e..0000000000 --- a/tests/hipify-clang/run_test.bat +++ /dev/null @@ -1,21 +0,0 @@ -@echo off -setlocal - -for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i -if not defined FILE_CHECK (echo Error: FileCheck.exe not found in PATH. && exit /b 1) - -set HIPIFY=%1 -set IN_FILE=%2 -set TMP_FILE=%3 -set CUDA_ROOT=%4 -set ROC=%5 - -set all_args=%* -call set clang_args=%%all_args:*%6=%% -set clang_args=%6%clang_args% - -%HIPIFY% -o=%TMP_FILE% %IN_FILE% %CUDA_ROOT% %ROC% -- %clang_args% -if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) - -findstr /v /r /c:"[ ]*//[ ]*[CHECK*|RUN]" %TMP_FILE% | %FILE_CHECK% %IN_FILE% -if errorlevel 1 (echo Error: FileCheck.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) diff --git a/tests/hipify-clang/run_test.sh b/tests/hipify-clang/run_test.sh deleted file mode 100755 index 357976558e..0000000000 --- a/tests/hipify-clang/run_test.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -set -o errexit - -# Run a single LIT test file in a magical way that preserves colour output, to work around -# a known flaw in lit. - -# Capture lit substitutions -HIPIFY=$1 -IN_FILE=$2 -TMP_FILE=$3 -CUDA_ROOT=$4 -ROC=$5 -shift 5 - -# Remaining args are the ones to forward to clang proper. - -$HIPIFY -o=$TMP_FILE $IN_FILE $CUDA_ROOT $ROC -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE diff --git a/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu b/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu deleted file mode 100644 index 6b6f4f5dde..0000000000 --- a/tests/hipify-clang/unit_tests/casts/reinterpret_cast.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include - -__global__ -void fn(float* px, float* py) { - bool a[42]; - __shared__ double b[69]; - for (auto&& x : b) x = *py++; - for (auto&& x : a) x = *px++ > 0.0; - for (auto&& x : a) if (x) *--py = *--px; -} - -int main() { - // CHECK: hipFuncCache_t cacheConfig; - cudaFuncCache cacheConfig; - void* func; - // CHECK: hipFuncSetCacheConfig(reinterpret_cast(func), cacheConfig); - cudaFuncSetCacheConfig(func, cacheConfig); - // CHECK: hipFuncAttributes attr{}; - cudaFuncAttributes attr{}; - // CHECK: auto r = hipFuncGetAttributes(&attr, reinterpret_cast(&fn)); - auto r = cudaFuncGetAttributes(&attr, &fn); - // CHECK: if (r != hipSuccess || attr.maxThreadsPerBlock == 0) { - if (r != cudaSuccess || attr.maxThreadsPerBlock == 0) { - return 1; - } - return 0; -} diff --git a/tests/hipify-clang/unit_tests/device/atomics.cu b/tests/hipify-clang/unit_tests/device/atomics.cu deleted file mode 100644 index 3089efe1b8..0000000000 --- a/tests/hipify-clang/unit_tests/device/atomics.cu +++ /dev/null @@ -1,286 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args "-Xclang" "-fcuda-allow-variadic-functions" - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include "hip/hip_runtime.h" -#include "cuda_runtime.h" - -#include -#include -#include -#include -#include -#include - -#define EXIT_WAIVED 2 - -const char* sampleName = "hipSimpleAtomicsTest"; - -using namespace std; -// Auto-Verification Code -bool testResult = true; - -bool computeGoldBitwise(...) { - return true; -} - -template{}>::type* = nullptr> -bool computeGoldBitwise(T* gpuData, int len) { - T val = 0xff; - for (int i = 0; i < len; ++i) { - // 9th element should be 1 - val &= (2 * i + 7); - } - if (val != gpuData[8]) { - printf("atomicAnd failed\n"); - return false; - } - - val = 0; - for (int i = 0; i < len; ++i) { - // 10th element should be 0xff - val |= (1 << i); - } - if (val != gpuData[9]) { - printf("atomicOr failed\n"); - return false; - } - - val = 0xff; - for (int i = 0; i < len; ++i) { - // 11th element should be 0xff - val ^= i; - } - if (val != gpuData[10]) { - printf("atomicXor failed\n"); - return false; - } - - return true; -} - -template -bool computeGold(T* gpuData, int len) { - T val = 0; - for (int i = 0; i < len; ++i) { - val += 10; - } - if (val != gpuData[0]) { - printf("atomicAdd failed\n"); - return false; - } - - val = 0; - for (int i = 0; i < len; ++i) { - val -= 10; - } - if (val != gpuData[1]) { - printf("atomicSub failed\n"); - return false; - } - - bool found = false; - for (T i = 0; i < len; ++i) { - // third element should be a member of [0, len) - if (i == gpuData[2]) { - found = true; - break; - } - } - if (!found) { - printf("atomicExch failed\n"); - return false; - } - - val = -(1 << 8); - for (T i = 0; i < len; ++i) { - // fourth element should be len-1 - val = max(val, i); - } - if (val != gpuData[3]) { - printf("atomicMax failed\n"); - return false; - } - - val = 1 << 8; - for (T i = 0; i < len; ++i) { - val = min(val, i); - } - if (val != gpuData[4]) { - printf("atomicMin failed\n"); - return false; - } - - int limit = 17; - val = 0; - for (int i = 0; i < len; ++i) { - val = (val >= limit) ? 0 : val + 1; - } - if (val != gpuData[5]) { - printf("atomicInc failed\n"); - return false; - } - - limit = 137; - val = 0; - for (int i = 0; i < len; ++i) { - val = ((val == 0) || (val > limit)) ? limit : val - 1; - } - if (val != gpuData[6]) { - printf("atomicDec failed\n"); - return false; - } - - found = false; - for (T i = 0; i < len; ++i) { - // eighth element should be a member of [0, len) - if (i == gpuData[7]) { - found = true; - break; - } - } - if (!found) { - printf("atomicCAS failed\n"); - return false; - } - - return computeGoldBitwise(gpuData, len); -} - -__device__ -void testKernelExch(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelExch(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - // Atomic exchange - atomicExch(&g_odata[2], tid); -} - -__device__ -void testKernelSub(...) {} - -template< - typename T, - typename enable_if< - is_same{} || is_same{}>::type* = nullptr> -__device__ -void testKernelSub(T* g_odata) { - // Atomic subtraction (final should be 0) - atomicSub(&g_odata[1], 10); -} - -__device__ -void testKernelIntegral(...) {} - -template{}>::type* = nullptr> -__device__ -void testKernelIntegral(T* g_odata) { - // access thread id - const T tid = blockDim.x * blockIdx.x + threadIdx.x; - // Atomic maximum - atomicMax(&g_odata[3], tid); - // Atomic minimum - atomicMin(&g_odata[4], tid); - // Atomic increment (modulo 17+1) - atomicInc((unsigned int*)&g_odata[5], 17); - // Atomic decrement - atomicDec((unsigned int*)&g_odata[6], 137); - // Atomic compare-and-swap - atomicCAS(&g_odata[7], tid - 1, tid); - // Atomic AND - atomicAnd(&g_odata[8], 2 * tid + 7); - // Atomic OR - atomicOr(&g_odata[9], 1 << tid); - // Atomic XOR - atomicXor(&g_odata[10], tid); - testKernelSub(g_odata); -} - -template -__global__ void testKernel(T* g_odata) { - // Atomic addition - atomicAdd(&g_odata[0], 10); - testKernelIntegral(g_odata); - testKernelExch(g_odata); -} - -template -void runTest() { - // CHECK: hipDeviceProp_t deviceProp; - cudaDeviceProp deviceProp; - deviceProp.major = 0; - deviceProp.minor = 0; - int dev = 0; - // CHECK: hipGetDeviceProperties(&deviceProp, dev); - cudaGetDeviceProperties(&deviceProp, dev); - // Statistics about the GPU device - printf( - "> GPU device has %d Multi-Processors, " - "SM %d.%d compute capabilities\n\n", - deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor); - unsigned int numThreads = 256; - unsigned int numBlocks = 64; - unsigned int numData = 11; - unsigned int memSize = sizeof(T) * numData; - - // Allocate mem for the result on host side - T* hOData = (T*)malloc(memSize); - // Initialize the memory - for (unsigned int i = 0; i < numData; i++) hOData[i] = 0; - // To make the AND and XOR tests generate something other than 0... - hOData[8] = hOData[10] = 0xff; - // Allocate device memory for result - T* dOData; - // CHECK: hipMalloc((void**)&dOData, memSize); - cudaMalloc((void**)&dOData, memSize); - // Copy host memory to device to initialize to zero - // CHECK: hipMemcpy(dOData, hOData, memSize, hipMemcpyHostToDevice); - cudaMemcpy(dOData, hOData, memSize, cudaMemcpyHostToDevice); - // Execute the kernel - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(testKernel), dim3(numBlocks), dim3(numThreads), 0, 0, dOData); - testKernel<<>>(dOData); - // Copy result from device to host - // CHECK: hipMemcpy(hOData, dOData, memSize, hipMemcpyDeviceToHost); - cudaMemcpy(hOData, dOData, memSize, cudaMemcpyDeviceToHost); - // Compute reference solution - testResult = computeGold(hOData, numThreads * numBlocks); - // Cleanup memory - free(hOData); - // CHECK: hipFree(dOData); - cudaFree(dOData); -} - -int main(int argc, char** argv) { - printf("%s starting...\n", sampleName); - runTest(); - runTest(); - runTest(); - runTest(); -#if CUDA_VERSION >= 8000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 - runTest(); -#endif - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!"); - exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/tests/hipify-clang/unit_tests/device/device_symbols.cu b/tests/hipify-clang/unit_tests/device/device_symbols.cu deleted file mode 100644 index b58abeda46..0000000000 --- a/tests/hipify-clang/unit_tests/device/device_symbols.cu +++ /dev/null @@ -1,152 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include -#include -#include - -#define NUM 1024 -#define SIZE 1024 * 4 - -__device__ int globalIn[NUM]; -__device__ int globalOut[NUM]; - -__global__ void Assign(int* Out) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - Out[tid] = globalIn[tid]; - globalOut[tid] = globalIn[tid]; -} - -__device__ __constant__ int globalConst[NUM]; - -__global__ void checkAddress(int* addr, bool* out) { - *out = (globalConst == addr); -} - -int main() { - int *A, *Am, *B, *Ad, *C, *Cm; - A = new int[NUM]; - B = new int[NUM]; - C = new int[NUM]; - for (int i = 0; i < NUM; ++i) { - A[i] = -1 * i; - B[i] = 0; - C[i] = 0; - } - // CHECK: hipMalloc((void**)&Ad, SIZE); - cudaMalloc((void**)&Ad, SIZE); - // CHECK: hipHostMalloc((void**)&Am, SIZE); - cudaMallocHost((void**)&Am, SIZE); - // CHECK: hipHostMalloc((void**)&Cm, SIZE); - cudaMallocHost((void**)&Cm, SIZE); - for (int i = 0; i < NUM; ++i) { - Am[i] = -1 * i; - Cm[i] = 0; - } - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipStreamCreate(&stream); - cudaStreamCreate(&stream); - // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream); - cudaMemcpyToSymbolAsync(globalIn, Am, SIZE, 0, cudaMemcpyHostToDevice, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - cudaMemcpyFromSymbolAsync(Cm, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - for (int i = 0; i < NUM; ++i) { - assert(Am[i] == B[i]); - assert(Am[i] == Cm[i]); - } - for (int i = 0; i < NUM; ++i) { - A[i] = -2 * i; - B[i] = 0; - } - // CHECK: hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice); - cudaMemcpyToSymbol(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost); - cudaMemcpyFromSymbol(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost); - for (int i = 0; i < NUM; ++i) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - for (int i = 0; i < NUM; ++i) { - A[i] = -3 * i; - B[i] = 0; - } - // CHECK: hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream); - cudaMemcpyToSymbolAsync(globalIn, A, SIZE, 0, cudaMemcpyHostToDevice, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - // CHECK: hipLaunchKernelGGL(Assign, dim3(1, 1, 1), dim3(NUM, 1, 1), 0, 0, Ad); - Assign<<>>(Ad); - // CHECK: hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost); - cudaMemcpy(B, Ad, SIZE, cudaMemcpyDeviceToHost); - // CHECK: hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream); - cudaMemcpyFromSymbolAsync(C, globalOut, SIZE, 0, cudaMemcpyDeviceToHost, stream); - // CHECK: hipStreamSynchronize(stream); - cudaStreamSynchronize(stream); - for (int i = 0; i < NUM; ++i) { - assert(A[i] == B[i]); - assert(A[i] == C[i]); - } - bool *checkOkD; - bool checkOk = false; - size_t symbolSize = 0; - int *symbolAddress; - // CHECK: hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst)); - cudaGetSymbolSize(&symbolSize, globalConst); - // CHECK: hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst)); - cudaGetSymbolAddress((void**) &symbolAddress, globalConst); - // CHECK: hipMalloc((void**)&checkOkD, sizeof(bool)); - cudaMalloc((void**)&checkOkD, sizeof(bool)); - // CHECK: hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD); - checkAddress<<>>(symbolAddress, checkOkD); - // CHECK: hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost); - cudaMemcpy(&checkOk, checkOkD, sizeof(bool), cudaMemcpyDeviceToHost); - // CHECK: hipFree(checkOkD); - cudaFree(checkOkD); - assert(checkOk); - assert(symbolSize == SIZE); - // CHECK: hipHostFree(Am); - cudaFreeHost(Am); - // CHECK: hipHostFree(Cm); - cudaFreeHost(Cm); - // CHECK: hipFree(Ad); - cudaFree(Ad); - delete[] A; - delete[] B; - delete[] C; - return 0; -} diff --git a/tests/hipify-clang/unit_tests/device/math_functions.cu b/tests/hipify-clang/unit_tests/device/math_functions.cu deleted file mode 100644 index c833ca0182..0000000000 --- a/tests/hipify-clang/unit_tests/device/math_functions.cu +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. -// ToDo: change lit testing in order to parse the output. - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -// CHECK: #include -#include - -namespace my { - // user defined function - unsigned int umin(unsigned int arg1, unsigned int arg2) { - return (arg1 < arg2) ? arg1 : arg2; - } - // user defined function - unsigned int umax(unsigned int arg1, unsigned int arg2) { - return (arg1 > arg2) ? arg1 : arg2; - } -} - -__global__ void uint_arithm(float* A, float* B, float* C, unsigned int u1, unsigned int u2) -{ - // device function call (warn if unsupported) - unsigned int _umin = umin ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umax = umax ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umin_global = ::umin ( u1, u2 ); - // device function call (warn if unsupported) - unsigned int _umax_global = ::umax(u1, u2); - if (_umin != _umin_global) return; - if (_umax != _umax_global) return; - int i = threadIdx.x; - A[i] = i + _umin; - B[i] = i + _umax; - C[i] = A[i] + B[i]; -} - -int main() { - unsigned int u1 = 33; - unsigned int u2 = 34; - // user defined function call - unsigned int _min = my::umin(u1, u2); - // user defined function call - unsigned int _max = my::umax(u1, u2); - float *A, *B, *C; - // CHECK: hipMalloc((void**)&A, SIZE); - cudaMalloc((void**)&A, SIZE); - // CHECK: hipMalloc((void**)&B, SIZE); - cudaMalloc((void**)&B, SIZE); - // CHECK: hipMalloc((void**)&C, SIZE); - cudaMalloc((void**)&C, SIZE); - dim3 dimGrid(LEN / 512, 1, 1); - dim3 dimBlock(512, 1, 1); - // CHECK: hipLaunchKernelGGL(uint_arithm, dim3(dimGrid), dim3(dimBlock), 0, 0, A, B, C, u1, u2); - uint_arithm<<>>(A, B, C, u1, u2); - return _min < _max; -} diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_01.cu b/tests/hipify-clang/unit_tests/headers/headers_test_01.cu deleted file mode 100644 index 013d7c17c4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_01.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -#include -#include -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_02.cu b/tests/hipify-clang/unit_tests/headers/headers_test_02.cu deleted file mode 100644 index 957fd16559..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_02.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include "hip/hip_runtime.h" -// CHECK-NOT: #include "cuda_runtime.h" -// CHECK: #include -#include "cuda.h" -#include "cuda_runtime.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_03.cu b/tests/hipify-clang/unit_tests/headers/headers_test_03.cu deleted file mode 100644 index 14735172fb..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_03.cu +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -// CHECK-NOT: #include -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_04.cu b/tests/hipify-clang/unit_tests/headers/headers_test_04.cu deleted file mode 100644 index 10a7daf41c..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_04.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NEXT: #include -// CHECK-NEXT: #include -#include -#include -// CHECK-NOT: #include -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_05.cu b/tests/hipify-clang/unit_tests/headers/headers_test_05.cu deleted file mode 100644 index 4706044b9d..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_05.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -// CHECK-NOT: #include -#include - -int main(int argc, char* argv[]) { - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_06.cu b/tests/hipify-clang/unit_tests/headers/headers_test_06.cu deleted file mode 100644 index 1adccd95e4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_06.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -#include -#include -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_07.cu b/tests/hipify-clang/unit_tests/headers/headers_test_07.cu deleted file mode 100644 index 1effc189b8..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_07.cu +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" -// CHECK: #include -#include "cublas_v2.h" -#include "cublas.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_08.cu b/tests/hipify-clang/unit_tests/headers/headers_test_08.cu deleted file mode 100644 index aca7f194b0..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_08.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK-NOT: #include -// CHECK: #include -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" -// CHECK: #include -#include -#include -#include -#include "cublas_v2.h" -#include "cublas.h" -#include diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_09.cu b/tests/hipify-clang/unit_tests/headers/headers_test_09.cu deleted file mode 100644 index 37e718b5a4..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_09.cu +++ /dev/null @@ -1,100 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -// CHECK: #include - -// CHECK-NOT: #include -// CHECK-NOT: #include - -// CHECK: #include "hip/hip_runtime_api.h" -// CHECK: #include "hip/channel_descriptor.h" -// CHECK: #include "hip/device_functions.h" -// CHECK: #include "hip/driver_types.h" -// CHECK: #include "hip/hip_complex.h" -// CHECK: #include "hip/hip_fp16.h" -// CHECK: #include "hip/hip_texture_types.h" -// CHECK: #include "hip/hip_vector_types.h" - -// CHECK: #include - -// CHECK: #include "hipblas.h" -// CHECK-NOT: #include "cublas.h" - -// CHECK: #include - -// CHECK: #include "hiprand.h" -// CHECK: #include "hiprand_kernel.h" - -// CHECK: #include - -// CHECK-NOT: #include "hiprand.h" -// CHECK-NOT: #include "hiprand_kernel.h" -// CHECK-NOT: #include "curand_discrete.h" -// CHECK-NOT: #include "curand_discrete2.h" -// CHECK-NOT: #include "curand_globals.h" -// CHECK-NOT: #include "curand_lognormal.h" -// CHECK-NOT: #include "curand_mrg32k3a.h" -// CHECK-NOT: #include "curand_mtgp32.h" -// CHECK-NOT: #include "curand_mtgp32_host.h" -// CHECK-NOT: #include "curand_mtgp32_kernel.h" -// CHECK-NOT: #include "curand_mtgp32dc_p_11213.h" -// CHECK-NOT: #include "curand_normal.h" -// CHECK-NOT: #include "curand_normal_static.h" -// CHECK-NOT: #include "curand_philox4x32_x.h" -// CHECK-NOT: #include "curand_poisson.h" -// CHECK-NOT: #include "curand_precalc.h" -// CHECK-NOT: #include "curand_uniform.h" - -// CHECK: #include - -// CHECK: #include "hipfft.h" -// CHECK: #include "hipsparse.h" - -#include - -#include - -#include - -#include "cuda_runtime_api.h" -#include "channel_descriptor.h" -#include "device_functions.h" -#include "driver_types.h" -#include "cuComplex.h" -#include "cuda_fp16.h" -#include "cuda_texture_types.h" -#include "vector_types.h" - -#include - -#include "cublas_v2.h" -#include "cublas.h" - -#include - -#include "curand.h" -#include "curand_kernel.h" - -#include - -#include "curand_discrete.h" -#include "curand_discrete2.h" -#include "curand_globals.h" -#include "curand_lognormal.h" -#include "curand_mrg32k3a.h" -#include "curand_mtgp32.h" -#include "curand_mtgp32_host.h" -#include "curand_mtgp32_kernel.h" -#include "curand_mtgp32dc_p_11213.h" -#include "curand_normal.h" -#include "curand_normal_static.h" -#include "curand_philox4x32_x.h" -#include "curand_poisson.h" -#include "curand_precalc.h" -#include "curand_uniform.h" - -#include - -#include "cufft.h" - -#include "cusparse.h" diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_10.cu b/tests/hipify-clang/unit_tests/headers/headers_test_10.cu deleted file mode 100644 index 1c2db50e3b..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_10.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Checks that HIP header file is included after #pragma once, -// which goes before include guard controlling macro. -// CHECK: #pragma once -// CHECK-NEXT: #include -#pragma once -#ifndef HEADERS_TEST_10_H -// CHECK: #ifndef HEADERS_TEST_10_H -// CHECK-NOT: #include -#define HEADERS_TEST_10_H -#include -static int counter = 0; -#endif // HEADERS_TEST_10_H diff --git a/tests/hipify-clang/unit_tests/headers/headers_test_11.cu b/tests/hipify-clang/unit_tests/headers/headers_test_11.cu deleted file mode 100644 index 7c59dbe4a7..0000000000 --- a/tests/hipify-clang/unit_tests/headers/headers_test_11.cu +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Checks that HIP header file is included after include guard controlling macro, -// which goes before #pragma once. -// CHECK: #ifndef HEADERS_TEST_10_H -// CHECK-NEXT: #include -#ifndef HEADERS_TEST_10_H -// CHECK: #pragma once -#pragma once -// CHECK-NOT: #include -#define HEADERS_TEST_10_H -#include -static int counter = 0; -#endif // HEADERS_TEST_10_H diff --git a/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu b/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu deleted file mode 100644 index 3795d3c799..0000000000 --- a/tests/hipify-clang/unit_tests/kernel_launch/kernel_launch_01.cu +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// Synthetic test to warn only on device functions umin and umax as unsupported, but not on user defined ones. -// ToDo: change lit testing in order to parse the output. - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -#define ITER 1024*1024 - -// CHECK: #include -#include - -#define CUDA_LAUNCH(cuda_call,dimGrid,dimBlock, ...) \ - cuda_call<<>>(__VA_ARGS__); - -__global__ void Inc1(float *Ad, float *Bd) { - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if (tx < 1) { - for (int i = 0; i < ITER; ++i) { - Ad[tx] = Ad[tx] + 1.0f; - for (int j = 0; j < 256; ++j) { - Bd[tx] = Ad[tx]; - } - } - } -} - -int main() { - float *A, *Ad, *Bd; - A = new float[LEN]; - for (int i = 0; i < LEN; ++i) { - A[i] = 0.0f; - } - // CHECK: hipError_t status; - cudaError_t status; - // CHECK: status = hipHostRegister(A, SIZE, hipHostRegisterMapped); - status = cudaHostRegister(A, SIZE, cudaHostRegisterMapped); - // CHECK: hipHostGetDevicePointer(&Ad, A, 0); - cudaHostGetDevicePointer(&Ad, A, 0); - // CHECK: hipMalloc((void**)&Bd, SIZE); - cudaMalloc((void**)&Bd, SIZE); - dim3 dimGrid(LEN / 512, 1, 1); - dim3 dimBlock(512, 1, 1); - - // CHECK: hipLaunchKernelGGL(Inc1, dim3(dimGrid), dim3(dimBlock), 0, 0, Ad, Bd); - CUDA_LAUNCH(Inc1, dimGrid, dimBlock, Ad, Bd); -} diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h deleted file mode 100644 index e9437c11f5..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/core/common_cudnn.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef CAFFE2_CORE_COMMON_CUDNN_H_ -#define CAFFE2_CORE_COMMON_CUDNN_H_ - -#include -#include - -#endif // CAFFE2_CORE_COMMON_CUDNN_H_ diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h deleted file mode 100644 index 7b8a13788a..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2/operators/spatial_batch_norm_op.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ -#define CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ - -#include -#include -#include -#include -#include - -namespace caffe2 { - -} // namespace caffe2 - -#endif // CAFFE2_OPERATORS_SPATIAL_BATCH_NORM_OP_H_ diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu deleted file mode 100644 index 3c82045d30..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_01.cu +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -// NOTE: Nonworking code just for conversion testing - -// CHECK: #include -#include -#include -#include -// CHECK: #include "caffe2/operators/hip/spatial_batch_norm_op_miopen.hip" -#include "caffe2/operators/spatial_batch_norm_op.h" -// CHECK: #include "caffe2/core/hip/common_miopen.h" -#include "caffe2/core/common_cudnn.h" diff --git a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu b/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu deleted file mode 100644 index 7f29cfe25c..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CAFFE2/caffe2_02.cu +++ /dev/null @@ -1,102 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -// NOTE: Nonworking code just for conversion testing - -// CHECK: #include -#include -#include -#include -#include - -namespace caffe2 { - -// Operator Definition. -struct OperatorDef { - int input = 1; - int output = 2; - int name = 3; -}; - -class OperatorBase; -class Workspace; - -template -class Observable { - public: - Observable() = default; - - Observable(Observable&&) = default; - Observable& operator =(Observable&&) = default; - - virtual ~Observable() = default; -}; - -template -class ObserverBase { - public: - explicit ObserverBase(T* subject) : subject_(subject) {} - - virtual void Start() {} - virtual void Stop() {} - - virtual std::string debugInfo() { - return "Not implemented."; - } - - virtual ~ObserverBase() noexcept {}; - - T* subject() const { - return subject_; - } - - protected: - T* subject_; -}; - -typedef ObserverBase OperatorObserver; - -class OperatorBase : public Observable { - public: - explicit OperatorBase(const OperatorDef& operator_def, Workspace* ws); - virtual ~OperatorBase() noexcept {} -}; - -template -class Operator : public OperatorBase { - public: - explicit Operator(const OperatorDef& operator_def, Workspace* ws) - : OperatorBase(operator_def, ws) { - } - ~Operator() noexcept override {} -}; - -template -class DummyEmptyOp : public Operator { - public: - DummyEmptyOp(const OperatorDef& def, Workspace* ws) - : Operator(def, ws) {} - - bool RunOnDevice() final { return true; } -}; - - -class CUDAContext { -public: - CUDAContext(); - virtual ~CUDAContext() noexcept {} -}; - -#define REGISTER_CUDA_OPERATOR(name, ...) \ - void CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ - static void CAFFE_ANONYMOUS_VARIABLE_CUDA##name() { \ - CAFFE2_PLEASE_ADD_OPERATOR_SCHEMA_FOR_##name(); \ - } - -#define REGISTER_CUDA_OPERATOR_CREATOR(key, ...) - -// CHECK: REGISTER_HIP_OPERATOR(Operator, DummyEmptyOp); -REGISTER_CUDA_OPERATOR(Operator, DummyEmptyOp); -// CHECK: REGISTER_HIP_OPERATOR_CREATOR(Operator, DummyEmptyOp); -REGISTER_CUDA_OPERATOR_CREATOR(Operator, DummyEmptyOp); - -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu deleted file mode 100644 index 4646015e74..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_01.cu +++ /dev/null @@ -1,60 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -#include - -// TODO: -// using namespace cub; - -template -__global__ void sort(const T* data_in, T* data_out){ - // CHECK: typedef ::hipcub::BlockRadixSort BlockRadixSortT; - typedef ::cub::BlockRadixSort BlockRadixSortT; - __shared__ typename BlockRadixSortT::TempStorage tmp_sort; - double items[4]; - int i0 = 4 * (blockIdx.x * blockDim.x + threadIdx.x); - for (int i = 0; i < 4; ++i){ - items[i] = data_in[i0 + i]; - } - BlockRadixSortT(tmp_sort).Sort(items); - for (int i = 0; i < 4; ++i){ - data_out[i0 + i] = items[i]; - } -} - -int main(){ - double* d_gpu = NULL; - double* result_gpu = NULL; - double* data_sorted = new double[4096]; - // Allocate memory on the GPU - // CHECK: hipMalloc(&d_gpu, 4096 * sizeof(double)); - cudaMalloc(&d_gpu, 4096 * sizeof(double)); - // CHECK: hipMalloc(&result_gpu, 4096 * sizeof(double)); - cudaMalloc(&result_gpu, 4096 * sizeof(double)); - // CHECK: hiprandGenerator_t gen; - curandGenerator_t gen; - // Create generator - // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); - curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); - // Fill array with random numbers - // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); - curandGenerateNormalDouble(gen, d_gpu, 4096, 0.0, 1.0); - // Destroy generator - // CHECK: hiprandDestroyGenerator(gen); - curandDestroyGenerator(gen); - // Sort data - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort), dim3(1), dim3(1024), 0, 0, d_gpu, result_gpu); - sort<<<1, 1024>>>(d_gpu, result_gpu); - // CHECK: hipMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), hipMemcpyDeviceToHost); - cudaMemcpy(data_sorted, result_gpu, 4096 * sizeof(double), cudaMemcpyDeviceToHost); - // Write the sorted data to standard out - for (int i = 0; i < 4096; ++i){ - std::cout << data_sorted[i] << ", "; - } - std::cout << std::endl; -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu deleted file mode 100644 index 21898baa03..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_02.cu +++ /dev/null @@ -1,69 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -#include - -template -__global__ void sort(const T* data_in, T* data_out){ - // CHECK: typedef hipcub::BlockLoad BlockLoadT; - typedef cub::BlockLoad BlockLoadT; - // CHECK: typedef hipcub::BlockRadixSort BlockRadixSortT; - typedef cub::BlockRadixSort BlockRadixSortT; - // CHECK: typedef hipcub::BlockStore BlockStoreT; - typedef cub::BlockStore BlockStoreT; - __shared__ union { - typename BlockLoadT::TempStorage load; - typename BlockRadixSortT::TempStorage sort; - typename BlockStoreT::TempStorage store; - } tmp_storage; - T items[ITEMS_PER_THREAD]; - BlockLoadT(tmp_storage.load).Load(data_in + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); - __syncthreads(); - BlockRadixSortT(tmp_storage.sort).Sort(items); - __syncthreads(); - BlockStoreT(tmp_storage.store).Store(data_out + blockIdx.x * BLOCK_WIDTH * ITEMS_PER_THREAD, items); -} - -int main() { - double* d_gpu = NULL; - double* result_gpu = NULL; - double* data_sorted = new double[1000*4096]; - // Allocate memory on the GPU - // CHECK: hipMalloc(&d_gpu, 1000*4096 * sizeof(double)); - cudaMalloc(&d_gpu, 1000*4096 * sizeof(double)); - // CHECK: hipMalloc(&result_gpu, 1000*4096 * sizeof(double)); - cudaMalloc(&result_gpu, 1000*4096 * sizeof(double)); - // CHECK: hiprandGenerator_t gen; - curandGenerator_t gen; - // Create generator - // CHECK: hiprandCreateGenerator(&gen, HIPRAND_RNG_PSEUDO_DEFAULT); - curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); - // Fill array with random numbers - // CHECK: hiprandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); - curandGenerateNormalDouble(gen, d_gpu, 1000*4096, 0.0, 1.0); - // Destroy generator - // CHECK: hiprandDestroyGenerator(gen); - curandDestroyGenerator(gen); - // Sort data - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<512, 8, hipcub::BLOCK_LOAD_TRANSPOSE, hipcub::BLOCK_STORE_TRANSPOSE>), dim3(1000), dim3(512), 0, 0, d_gpu, result_gpu); - sort<512, 8, cub::BLOCK_LOAD_TRANSPOSE, cub::BLOCK_STORE_TRANSPOSE><<<1000, 512>>>(d_gpu, result_gpu); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(sort<256, 16, hipcub::BLOCK_LOAD_DIRECT, hipcub::BLOCK_STORE_DIRECT>), dim3(1000), dim3(256), 0, 0, d_gpu, result_gpu); - sort<256, 16, cub::BLOCK_LOAD_DIRECT, cub::BLOCK_STORE_DIRECT><<<1000, 256>>>(d_gpu, result_gpu); - // CHECK: hipMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), hipMemcpyDeviceToHost); - cudaMemcpy(data_sorted, result_gpu, 1000*4096*sizeof(double), cudaMemcpyDeviceToHost); - // Write the sorted data to standard out - for (int i = 0; i < 4095; ++i) { - std::cout << data_sorted[i] << ", "; - } - std::cout << data_sorted[4095] << std::endl; -} diff --git a/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu b/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu deleted file mode 100644 index bc914d419d..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/CUB/cub_03.cu +++ /dev/null @@ -1,33 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include -// CHECK: #include -#include - -// using namespace hipcub; -using namespace cub; - -// Simple CUDA kernel for computing tiled partial sums -template - cub::BlockScanAlgorithm SCAN_ALGO> -__global__ void ScanTilesKernel(int *d_in, int *d_out) { - // Specialize collective types for problem context - // CHECK: typedef ::hipcub::BlockLoad BlockLoadT; - typedef ::cub::BlockLoad BlockLoadT; - typedef BlockScan BlockScanT; - // Allocate on-chip temporary storage - __shared__ union { - typename BlockLoadT::TempStorage load; - typename BlockScanT::TempStorage reduce; - } temp_storage; - // Load data per thread - int thread_data[ITEMS_PER_THREAD]; - int offset = blockIdx.x * (BLOCK_THREADS * ITEMS_PER_THREAD); - BlockLoadT(temp_storage.load).Load(d_in + offset, offset); - __syncthreads(); - // Compute the block-wide prefix sum - BlockScanT(temp_storage).Sum(thread_data); -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu deleted file mode 100644 index 69812c98b0..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_0_based_indexing.cu +++ /dev/null @@ -1,81 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -#include -#include -// CHECK: #include "hipblas.h" -#include "cublas.h" -#define M 6 -#define N 5 -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -static __inline__ void modify(float *m, int ldm, int n, int p, int q, float - alpha, float beta) { - // CHECK: hipblasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: hipblasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); -} -int main(void) { - int i, j; - // CHECK: hipblasStatus_t stat; - cublasStatus stat; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - a[IDX2C(i, j, M)] = (float)(i * M + j + 1); - } - } - // cublasInit is not supported yet - cublasInit(); - // cublasAlloc is not supported yet - stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("device memory allocation failed"); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); - // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - printf("%7.0f", a[IDX2C(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu deleted file mode 100644 index 6983140eac..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_1_based_indexing.cu +++ /dev/null @@ -1,90 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipblas.h" -#include "cublas_v2.h" -#define M 6 -#define N 5 -#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) -// CHECK: static __inline__ void modify(hipblasHandle_t handle, float *m, int ldm, int -static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int - n, int p, int q, float alpha, float beta) { - // CHECK: hipblasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - // CHECK: hipblasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); - cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); -} -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: hipblasStatus_t stat; - // CHECK: hipblasHandle_t handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); - } - } - // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); - cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); - // CHECK: if (cudaStat != hipSuccess) { - if (cudaStat != cudaSuccess) { - printf("device memory allocation failed"); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasCreate(&handle); - stat = cublasCreate(&handle); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("CUBLAS initialization failed\n"); - return EXIT_FAILURE; - } - // CHECK: stat = hipblasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); - // CHECK: stat = hipblasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != HIPBLAS_STATUS_SUCCESS) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - // CHECK: hipFree(devPtrA); - // CHECK: hipblasDestroy(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - printf("%7.0f", a[IDX2F(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu deleted file mode 100644 index ecd8fb7eee..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/cublas_sgemm_matrix_multiplication.cu +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipblas.h" -#include "cublas_v2.h" -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -#define m 6 -#define n 4 -#define k 5 -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: hipblasStatus_t stat; - // CHECK: hipblasHandle_t handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float * a; - float * b; - float * c; - a = (float *)malloc(m*k * sizeof(float)); - b = (float *)malloc(k*n * sizeof(float)); - c = (float *)malloc(m*n * sizeof(float)); - int ind = 11; - for (j = 0; j -#include -#include -#include -// CHECK: #include "rocblas.h" -#include "cublas.h" -#define M 6 -#define N 5 -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -static __inline__ void modify(float *m, int ldm, int n, int p, int q, float - alpha, float beta) { - // CHECK: rocblas_sscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - // CHECK: rocblas_sscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); - cublasSscal(n - p, alpha, &m[IDX2C(p, q, ldm)], ldm); - cublasSscal(ldm - p, beta, &m[IDX2C(p, q, ldm)], 1); -} -int main(void) { - int i, j; - // CHECK: rocblas_status stat; - cublasStatus stat; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - a[IDX2C(i, j, M)] = (float)(i * M + j + 1); - } - } - // cublasInit is not supported yet - cublasInit(); - // cublasAlloc is not supported yet - stat = cublasAlloc(M*N, sizeof(*a), (void**)&devPtrA); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("device memory allocation failed"); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - modify(devPtrA, M, N, 1, 2, 16.0f, 12.0f); - // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - return EXIT_FAILURE; - } - // cublasFree is not supported yet - cublasFree(devPtrA); - // cublasShutdown is not supported yet - cublasShutdown(); - for (j = 0; j < N; j++) { - for (i = 0; i < M; i++) { - printf("%7.0f", a[IDX2C(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu deleted file mode 100644 index 0202e2f7fa..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_1_based_indexing_rocblas.cu +++ /dev/null @@ -1,90 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "rocblas.h" -#include "cublas_v2.h" -#define M 6 -#define N 5 -#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1)) -// CHECK: static __inline__ void modify(rocblas_handle handle, float *m, int ldm, int -static __inline__ void modify(cublasHandle_t handle, float *m, int ldm, int - n, int p, int q, float alpha, float beta) { - // CHECK: rocblas_sscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - // CHECK: rocblas_sscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); - cublasSscal(handle, n - p + 1, &alpha, &m[IDX2F(p, q, ldm)], ldm); - cublasSscal(handle, ldm - p + 1, &beta, &m[IDX2F(p, q, ldm)], 1); -} -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: rocblas_status stat; - // CHECK: rocblas_handle handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float* devPtrA; - float* a = 0; - a = (float *)malloc(M * N * sizeof(*a)); - if (!a) { - printf("host memory allocation failed"); - return EXIT_FAILURE; - } - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - a[IDX2F(i, j, M)] = (float)((i - 1) * M + j); - } - } - // CHECK: cudaStat = hipMalloc((void**)&devPtrA, M*N * sizeof(*a)); - cudaStat = cudaMalloc((void**)&devPtrA, M*N * sizeof(*a)); - // CHECK: if (cudaStat != hipSuccess) { - if (cudaStat != cudaSuccess) { - printf("device memory allocation failed"); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_create_handle(&handle); - stat = cublasCreate(&handle); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("CUBLAS initialization failed\n"); - return EXIT_FAILURE; - } - // CHECK: stat = rocblas_set_matrix(M, N, sizeof(*a), a, M, devPtrA, M); - stat = cublasSetMatrix(M, N, sizeof(*a), a, M, devPtrA, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data download failed"); - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - modify(handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); - // CHECK: stat = rocblas_get_matrix(M, N, sizeof(*a), devPtrA, M, a, M); - stat = cublasGetMatrix(M, N, sizeof(*a), devPtrA, M, a, M); - // CHECK: if (stat != rocblas_status_success) { - if (stat != CUBLAS_STATUS_SUCCESS) { - printf("data upload failed"); - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - return EXIT_FAILURE; - } - // CHECK: hipFree(devPtrA); - // CHECK: rocblas_destroy_handle(handle); - cudaFree(devPtrA); - cublasDestroy(handle); - for (j = 1; j <= N; j++) { - for (i = 1; i <= M; i++) { - printf("%7.0f", a[IDX2F(i, j, M)]); - } - printf("\n"); - } - free(a); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu b/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu deleted file mode 100644 index 8e35f28f0e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuBLAS/rocBLAS/cublas_sgemm_matrix_multiplication_rocblas.cu +++ /dev/null @@ -1,108 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "-roc" %clang_args - -#include -#include -// CHECK: #include -#include -// CHECK: #include "rocblas.h" -#include "cublas_v2.h" -#define IDX2C(i,j,ld) (((j)*(ld))+(i)) -#define m 6 -#define n 4 -#define k 5 -int main(void) { - // CHECK: hipError_t cudaStat; - // CHECK: rocblas_status stat; - // CHECK: rocblas_handle handle; - cudaError_t cudaStat; - cublasStatus_t stat; - cublasHandle_t handle; - int i, j; - float * a; - float * b; - float * c; - a = (float *)malloc(m*k * sizeof(float)); - b = (float *)malloc(k*n * sizeof(float)); - c = (float *)malloc(m*n * sizeof(float)); - int ind = 11; - for (j = 0; j -// CHECK: #include "hip/hip_complex.h" -#include "cuComplex.h" - -#define TYPEFLOAT -#define DIMX 100 -#define DIMY 40 -#define moveX 2 -#define moveY 1 - -#define MAXITERATIONS 10 - -#ifdef TYPEFLOAT -#define TYPE float -// CHECK: #define cTYPE hipFloatComplex -#define cTYPE cuFloatComplex -// CHECK: #define cMakecuComplex(re,i) make_hipFloatComplex(re,i) -#define cMakecuComplex(re,i) make_cuFloatComplex(re,i) -#endif -#ifdef TYPEDOUBLE -// CHECK: #define TYPE hipDoubleComplex -#define TYPE cuDoubleComplex -// CHECK: #define cMakecuComplex(re,i) make_hipDoubleComplex(re,i) -#define cMakecuComplex(re,i) make_cuDoubleComplex(re,i) -#endif - -__device__ cTYPE juliaFunctor(cTYPE p, cTYPE c) { - // CHECK: return hipCaddf(hipCmulf(p, p), c); - return cuCaddf(cuCmulf(p, p), c); -} - -__device__ cTYPE convertToComplex(int x, int y, float zoom) { - TYPE jx = 1.5 * (x - DIMX / 2) / (0.5 * zoom * DIMX) + moveX; - TYPE jy = (y - DIMY / 2) / (0.5 * zoom * DIMY) + moveY; - return cMakecuComplex(jx, jy); -} - -__device__ int evolveComplexPoint(cTYPE p, cTYPE c) { - int it = 1; - // CHECK: while (it <= MAXITERATIONS && hipCabsf(p) <= 4) { - while (it <= MAXITERATIONS && cuCabsf(p) <= 4) { - p = juliaFunctor(p, c); - it++; - } - return it; -} - -__global__ void computeJulia(int* data, cTYPE c, float zoom) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - int j = blockIdx.y * blockDim.y + threadIdx.y; - - if (i -#include -#include -#include - -// CHECK: #include -#include -// CHECK: #include "hipDNN.h" -#include "cudnn.h" - -// CHECK: hipError_t err = (f); \ -// CHECK: if (err != hipSuccess) { \ - -#define CUDA_CALL(f) { \ - cudaError_t err = (f); \ - if (err != cudaSuccess) { \ - std::cout \ - << " Error occurred: " << err << std::endl; \ - std::exit(1); \ - } \ -} -// CHECK: hipdnnStatus_t err = (f); \ -// CHECK: if (err != HIPDNN_STATUS_SUCCESS) { \ - -#define CUDNN_CALL(f) { \ - cudnnStatus_t err = (f); \ - if (err != CUDNN_STATUS_SUCCESS) { \ - std::cout \ - << " Error occurred: " << err << std::endl; \ - std::exit(1); \ - } \ -} - -__global__ void dev_const(float *px, float k) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - px[tid] = k; -} - -__global__ void dev_iota(float *px) { - int tid = threadIdx.x + blockIdx.x * blockDim.x; - px[tid] = tid; -} - -void print(const float *data, int n, int c, int h, int w) { - std::vector buffer(1 << 20); - // CHECK: CUDA_CALL(hipMemcpy( - CUDA_CALL(cudaMemcpy( - buffer.data(), data, - n * c * h * w * sizeof(float), - // CHECK: hipMemcpyDeviceToHost)); - cudaMemcpyDeviceToHost)); - int a = 0; - for (int i = 0; i < n; ++i) { - for (int j = 0; j < c; ++j) { - std::cout << "n=" << i << ", c=" << j << ":" << std::endl; - for (int k = 0; k < h; ++k) { - for (int l = 0; l < w; ++l) { - std::cout << std::setw(4) << std::right << buffer[a]; - ++a; - } - std::cout << std::endl; - } - } - } - std::cout << std::endl; -} - -int main() { - // CHECK: hipdnnHandle_t cudnn; - cudnnHandle_t cudnn; - // CHECK: CUDNN_CALL(hipdnnCreate(&cudnn)); - CUDNN_CALL(cudnnCreate(&cudnn)); - - // input - const int in_n = 1; - const int in_c = 1; - const int in_h = 5; - const int in_w = 5; - std::cout << "in_n: " << in_n << std::endl; - std::cout << "in_c: " << in_c << std::endl; - std::cout << "in_h: " << in_h << std::endl; - std::cout << "in_w: " << in_w << std::endl; - std::cout << std::endl; - // CHECK: hipdnnTensorDescriptor_t in_desc; - cudnnTensorDescriptor_t in_desc; - // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&in_desc)); - CUDNN_CALL(cudnnCreateTensorDescriptor(&in_desc)); - // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( - CUDNN_CALL(cudnnSetTensor4dDescriptor( - // CHECK: in_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, - in_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, - in_n, in_c, in_h, in_w)); - - float *in_data; - // CHECK: CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &in_data, in_n * in_c * in_h * in_w * sizeof(float))); - - // filter - const int filt_k = 1; - const int filt_c = 1; - const int filt_h = 2; - const int filt_w = 2; - std::cout << "filt_k: " << filt_k << std::endl; - std::cout << "filt_c: " << filt_c << std::endl; - std::cout << "filt_h: " << filt_h << std::endl; - std::cout << "filt_w: " << filt_w << std::endl; - std::cout << std::endl; - - // CHECK: hipdnnFilterDescriptor_t filt_desc; - cudnnFilterDescriptor_t filt_desc; - // CHECK: CUDNN_CALL(hipdnnCreateFilterDescriptor(&filt_desc)); - CUDNN_CALL(cudnnCreateFilterDescriptor(&filt_desc)); - // CHECK: CUDNN_CALL(hipdnnSetFilter4dDescriptor( - CUDNN_CALL(cudnnSetFilter4dDescriptor( - // CHECK: filt_desc, HIPDNN_DATA_FLOAT, HIPDNN_TENSOR_NCHW, - filt_desc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, - filt_k, filt_c, filt_h, filt_w)); - - float *filt_data; - // CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &filt_data, filt_k * filt_c * filt_h * filt_w * sizeof(float))); - - // convolution - const int pad_h = 1; - const int pad_w = 1; - const int str_h = 1; - const int str_w = 1; - const int dil_h = 1; - const int dil_w = 1; - std::cout << "pad_h: " << pad_h << std::endl; - std::cout << "pad_w: " << pad_w << std::endl; - std::cout << "str_h: " << str_h << std::endl; - std::cout << "str_w: " << str_w << std::endl; - std::cout << "dil_h: " << dil_h << std::endl; - std::cout << "dil_w: " << dil_w << std::endl; - std::cout << std::endl; - - // CHECK: hipdnnConvolutionDescriptor_t conv_desc; - cudnnConvolutionDescriptor_t conv_desc; - // CUDNN_CALL(hipdnnCreateConvolutionDescriptor(&conv_desc)); - CUDNN_CALL(cudnnCreateConvolutionDescriptor(&conv_desc)); - // CHECK: CUDNN_CALL(hipdnnSetConvolution2dDescriptor( - CUDNN_CALL(cudnnSetConvolution2dDescriptor( - conv_desc, - pad_h, pad_w, str_h, str_w, dil_h, dil_w, - // CHECK: HIPDNN_CONVOLUTION, HIPDNN_DATA_FLOAT)); - CUDNN_CONVOLUTION, CUDNN_DATA_FLOAT)); - - // output - int out_n; - int out_c; - int out_h; - int out_w; - - // CHECK: CUDNN_CALL(hipdnnGetConvolution2dForwardOutputDim( - CUDNN_CALL(cudnnGetConvolution2dForwardOutputDim( - conv_desc, in_desc, filt_desc, - &out_n, &out_c, &out_h, &out_w)); - - std::cout << "out_n: " << out_n << std::endl; - std::cout << "out_c: " << out_c << std::endl; - std::cout << "out_h: " << out_h << std::endl; - std::cout << "out_w: " << out_w << std::endl; - std::cout << std::endl; - // CHECK: hipdnnTensorDescriptor_t out_desc; - cudnnTensorDescriptor_t out_desc; - // CHECK: CUDNN_CALL(hipdnnCreateTensorDescriptor(&out_desc)); - CUDNN_CALL(cudnnCreateTensorDescriptor(&out_desc)); - // CHECK: CUDNN_CALL(hipdnnSetTensor4dDescriptor( - CUDNN_CALL(cudnnSetTensor4dDescriptor( - // CHECK: out_desc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_FLOAT, - out_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, - out_n, out_c, out_h, out_w)); - - - cudnnDataType_t *dataType = nullptr; - cudnnTensorFormat_t *tensorFormat = nullptr; - int *p_filt_k = nullptr; - int *p_filt_c = nullptr; - int *p_filt_h = nullptr; - int *p_filt_w = nullptr; - - // CHECK: CUDNN_CALL(hipdnnGetFilter4dDescriptor( - CUDNN_CALL(cudnnGetFilter4dDescriptor( - filt_desc, dataType, tensorFormat, - p_filt_k, p_filt_c, p_filt_h, p_filt_w)); - - float *out_data; - // CHECK: CUDA_CALL(hipMalloc( - CUDA_CALL(cudaMalloc( - &out_data, out_n * out_c * out_h * out_w * sizeof(float))); - - // algorithm - // CHECK: hipdnnConvolutionFwdAlgo_t algo; - cudnnConvolutionFwdAlgo_t algo; - // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardAlgorithm( - CUDNN_CALL(cudnnGetConvolutionForwardAlgorithm( - cudnn, - in_desc, filt_desc, conv_desc, out_desc, - // CHECK: HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, 0, &algo)); - - std::cout << "Convolution algorithm: " << algo << std::endl; - std::cout << std::endl; - - // workspace - size_t ws_size; - // CHECK: CUDNN_CALL(hipdnnGetConvolutionForwardWorkspaceSize( - CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize( - cudnn, in_desc, filt_desc, conv_desc, out_desc, algo, &ws_size)); - - float *ws_data; - // CHECK: CUDA_CALL(hipMalloc(&ws_data, ws_size)); - CUDA_CALL(cudaMalloc(&ws_data, ws_size)); - - std::cout << "Workspace size: " << ws_size << std::endl; - std::cout << std::endl; - - // perform - float alpha = 1.f; - float beta = 0.f; - // CHECK: hipLaunchKernelGGL(dev_iota, dim3(in_w * in_h), dim3(in_n * in_c), 0, 0, in_data); - // CHECK: hipLaunchKernelGGL(dev_const, dim3(filt_w * filt_h), dim3(filt_k * filt_c), 0, 0, filt_data, 1.f); - dev_iota<<>>(in_data); - dev_const<<>>(filt_data, 1.f); - // CHECK: CUDNN_CALL(hipdnnConvolutionForward( - CUDNN_CALL(cudnnConvolutionForward( - cudnn, - &alpha, in_desc, in_data, filt_desc, filt_data, - conv_desc, algo, ws_data, ws_size, - &beta, out_desc, out_data)); - - // results - std::cout << "in_data:" << std::endl; - print(in_data, in_n, in_c, in_h, in_w); - - std::cout << "filt_data:" << std::endl; - print(filt_data, filt_k, filt_c, filt_h, filt_w); - - std::cout << "out_data:" << std::endl; - print(out_data, out_n, out_c, out_h, out_w); - - // finalizing - // CHECK: CUDA_CALL(hipFree(ws_data)); - CUDA_CALL(cudaFree(ws_data)); - // CHECK: CUDA_CALL(hipFree(out_data)); - CUDA_CALL(cudaFree(out_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(out_desc)); - CUDNN_CALL(cudnnDestroyTensorDescriptor(out_desc)); - // CHECK: CUDNN_CALL(hipdnnDestroyConvolutionDescriptor(conv_desc)); - CUDNN_CALL(cudnnDestroyConvolutionDescriptor(conv_desc)); - // CHECK: CUDA_CALL(hipFree(filt_data)); - CUDA_CALL(cudaFree(filt_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyFilterDescriptor(filt_desc)); - CUDNN_CALL(cudnnDestroyFilterDescriptor(filt_desc)); - // CHECK: CUDA_CALL(hipFree(in_data)); - CUDA_CALL(cudaFree(in_data)); - // CHECK: CUDNN_CALL(hipdnnDestroyTensorDescriptor(in_desc)); - CUDNN_CALL(cudnnDestroyTensorDescriptor(in_desc)); - // CHECK: CUDNN_CALL(hipdnnDestroy(cudnn)); - CUDNN_CALL(cudnnDestroy(cudnn)); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu b/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu deleted file mode 100644 index a2052b3de6..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuDNN/cudnn_softmax.cu +++ /dev/null @@ -1,159 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -// CHECK: #include -#include - -/** - * - * Author: Jon Gauthier - * February 2015 - * -. * Adopted for CUDA/CUDNN 9.0 - */ - -void printMatrix(const double *mat, int m, int n) { - for (int j = 0; j < n; j++) { - for (int i = 0; i < m; i++) { - printf("%f\n", mat[j * m + i]); - } - printf("\n\n"); - } -} - -double *makeDiffData(int m, int c) { - double *diff = (double *) calloc(m * c, sizeof(double)); - for (int j = 0; j < m; j++) { - int class_ = rand() % c; - printf("%d class: %d\n", j, class_); - for (int i = 0; i < c; i++) - diff[j * c + i] = class_ == i ? -c / (double) m : 0; - } - - return diff; -} - -int main() { - int m = 5, c = 4, numChannels = 1; - - double *fcLayer = (double *) malloc(m * c * sizeof(double)); - for (int i = 0; i < m; i++) { - double def = rand() % 25; - for (int c_idx = 0; c_idx < c; c_idx++) { - int offset = i * c + c_idx; - fcLayer[offset] = def; - } - } - printf("FC LAYER:\n"); - printMatrix(fcLayer, c, m); - - double *d_fcLayer; - // CHECK: hipMalloc((void**) &d_fcLayer, m * c * sizeof(double)); - cudaMalloc((void**) &d_fcLayer, m * c * sizeof(double)); - // CHECK: hipMemcpy(d_fcLayer, fcLayer, m * c * sizeof(double), hipMemcpyHostToDevice); - cudaMemcpy(d_fcLayer, fcLayer, m * c * sizeof(double), cudaMemcpyHostToDevice); - - double *d_softmaxData; - // CHECK: hipMalloc((void**) &d_softmaxData, m * c * sizeof(double)); - cudaMalloc((void**) &d_softmaxData, m * c * sizeof(double)); - - // CHECK: hipdnnHandle_t handle; - cudnnHandle_t handle; - // CHECK: hipdnnCreate(&handle); - cudnnCreate(&handle); - - float one = 1; - float zero = 0; - - // softmaxForward(n, c, h, w, dstData, &srcData); - // CHECK: hipdnnTensorDescriptor_t srcTensorDesc, sftTensorDesc; - // CHECK: hipdnnCreateTensorDescriptor(&srcTensorDesc); - // CHECK: hipdnnCreateTensorDescriptor(&sftTensorDesc); - cudnnTensorDescriptor_t srcTensorDesc, sftTensorDesc; - cudnnCreateTensorDescriptor(&srcTensorDesc); - cudnnCreateTensorDescriptor(&sftTensorDesc); - // CHECK: hipdnnSetTensor4dDescriptor(srcTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnSetTensor4dDescriptor(srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - // CHECK: hipdnnSetTensor4dDescriptor(sftTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnSetTensor4dDescriptor(sftTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - // CHECK: hipdnnSoftmaxForward(handle, HIPDNN_SOFTMAX_ACCURATE, HIPDNN_SOFTMAX_MODE_CHANNEL, &one, - cudnnSoftmaxForward(handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, &one, - srcTensorDesc, d_fcLayer, &zero, sftTensorDesc, d_softmaxData); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // Copy back - double *result = (double *) malloc(m * c * sizeof(double)); - // CHECK: hipMemcpy(result, d_softmaxData, m * c * sizeof(double), hipMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaMemcpy(result, d_softmaxData, m * c * sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - // Log - printf("SOFTMAX:\n"); - printMatrix(result, c, m); - - // Try backward - // CHECK: hipdnnTensorDescriptor_t diffTensorDesc; - // CHECK: hipdnnCreateTensorDescriptor(&diffTensorDesc); - // CHECK: hipdnnSetTensor4dDescriptor(diffTensorDesc, HIPDNN_TENSOR_NCHW, HIPDNN_DATA_DOUBLE, - cudnnTensorDescriptor_t diffTensorDesc; - cudnnCreateTensorDescriptor(&diffTensorDesc); - cudnnSetTensor4dDescriptor(diffTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_DOUBLE, - m, c, 1, 1); - - double *d_gradData; - // CHECK: hipMalloc((void**) &d_gradData, m * c * sizeof(double)); - cudaMalloc((void**) &d_gradData, m * c * sizeof(double)); - - double *diffData = makeDiffData(m, c); - double *d_diffData; - // CHECK: hipMalloc((void**) &d_diffData, m * c * sizeof(double)); - // CHECK: hipMemcpy(d_diffData, diffData, m * c * sizeof(double), hipMemcpyHostToDevice); - // CHECK: hipDeviceSynchronize(); - cudaMalloc((void**) &d_diffData, m * c * sizeof(double)); - cudaMemcpy(d_diffData, diffData, m * c * sizeof(double), cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - // CHECK: hipdnnSoftmaxBackward(handle, HIPDNN_SOFTMAX_ACCURATE, HIPDNN_SOFTMAX_MODE_CHANNEL, - cudnnSoftmaxBackward(handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_CHANNEL, - &one, srcTensorDesc, d_softmaxData, diffTensorDesc, d_diffData, &zero, sftTensorDesc, d_gradData); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // Copy back - double *result_backward = (double *) malloc(m * c * sizeof(double)); - // CHECK: hipMemcpy(result_backward, d_gradData, m * c * sizeof(double), hipMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaMemcpy(result_backward, d_gradData, m * c * sizeof(double), cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - // Log - printf("GRADIENT:\n"); - printMatrix(result_backward, c, m); - - // Destruct - free(result); - free(diffData); - free(result_backward); - free(fcLayer); - - // CHECK: hipdnnDestroyTensorDescriptor(srcTensorDesc); - // CHECK: hipdnnDestroyTensorDescriptor(sftTensorDesc); - // CHECK: hipdnnDestroyTensorDescriptor(diffTensorDesc); - // CHECK: hipFree(d_fcLayer); - // CHECK: hipFree(d_softmaxData); - // CHECK: hipFree(d_gradData); - // CHECK: hipFree(d_diffData); - // CHECK: hipdnnDestroy(handle); - cudnnDestroyTensorDescriptor(srcTensorDesc); - cudnnDestroyTensorDescriptor(sftTensorDesc); - cudnnDestroyTensorDescriptor(diffTensorDesc); - cudaFree(d_fcLayer); - cudaFree(d_softmaxData); - cudaFree(d_gradData); - cudaFree(d_diffData); - cudnnDestroy(handle); -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu b/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu deleted file mode 100644 index 9c05a53fa8..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuFFT/simple_cufft.cu +++ /dev/null @@ -1,78 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// CHECK: #include -#include -// CHECK: #include -#include -#include -#include - -#define DATASIZE 8 -#define BATCH 2 - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -// CHECK: inline void gpuAssert(hipError_t code, const char *file, int line, bool abort = true) -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort = true) -{ - // CHECK: if (code != hipSuccess) - if (code != cudaSuccess) - { - // CHECK: fprintf(stderr, "GPUassert: %s %s %dn", hipGetErrorString(code), file, line); - fprintf(stderr, "GPUassert: %s %s %dn", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - -int main() -{ - // --- Host side input data allocation and initialization - // CHECK: hipfftReal *hostInputData = (hipfftReal*)malloc(DATASIZE*BATCH * sizeof(hipfftReal)); - cufftReal *hostInputData = (cufftReal*)malloc(DATASIZE*BATCH * sizeof(cufftReal)); - for (int i = 0; iHost copy of the results - // CHECK: gpuErrchk(hipMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(hipfftComplex), hipMemcpyDeviceToHost)); - gpuErrchk(cudaMemcpy(hostOutputData, deviceOutputData, (DATASIZE / 2 + 1) * BATCH * sizeof(cufftComplex), cudaMemcpyDeviceToHost)); - - for (int i = 0; i -#include -#include -#include -#include -#include -#include -#include - -#include "cmdparser.hpp" -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: if ((x) != hipSuccess) { -#define CUDA_CALL(x) \ - do { \ - if ((x) != cudaSuccess) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) -// CHECK: if ((x) != HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) \ - do { \ - if ((x) != CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#ifndef DEFAULT_RAND_N -const size_t DEFAULT_RAND_N = 1024 * 1024 * 128; -#endif - -// CHECK: typedef hiprandRngType_t rng_type_t; -typedef curandRngType rng_type_t; - -template -// CHECK: using generate_func_type = std::function; -using generate_func_type = std::function; - -template -void run_benchmark(const cli::Parser& parser, const rng_type_t rng_type, - generate_func_type generate_func) { - const size_t size = parser.get("size"); - const size_t trials = parser.get("trials"); - - T* data; - // CHECK: CUDA_CALL(hipMalloc((void**)&data, size * sizeof(T))); - CUDA_CALL(cudaMalloc((void**)&data, size * sizeof(T))); - - // CHECK: hiprandGenerator_t generator; - // CHECK: CURAND_CALL(hiprandCreateGenerator(&generator, rng_type)); - curandGenerator_t generator; - CURAND_CALL(curandCreateGenerator(&generator, rng_type)); - - const size_t dimensions = parser.get("dimensions"); - // CHECK: hiprandStatus_t status = hiprandSetQuasiRandomGeneratorDimensions(generator, dimensions); - // CHECK: if (status != HIPRAND_STATUS_TYPE_ERROR) - curandStatus_t status = curandSetQuasiRandomGeneratorDimensions(generator, dimensions); - if (status != CURAND_STATUS_TYPE_ERROR) // If the RNG is not quasi-random - { - CURAND_CALL(status); - } - - // Warm-up - for (size_t i = 0; i < 5; i++) { - CURAND_CALL(generate_func(generator, data, size)); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - - // Measurement - auto start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < trials; i++) { - CURAND_CALL(generate_func(generator, data, size)); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - - std::cout << std::fixed << std::setprecision(3) << " " - << "Throughput = " << std::setw(8) - << (trials * size * sizeof(T)) / (elapsed.count() / 1e3 * (1 << 30)) - << " GB/s, Samples = " << std::setw(8) - << (trials * size) / (elapsed.count() / 1e3 * (1 << 30)) - << " GSample/s, AvgTime (1 trial) = " << std::setw(8) << elapsed.count() / trials - << " ms, Time (all) = " << std::setw(8) << elapsed.count() << " ms, Size = " << size - << std::endl; - // CHECK: CURAND_CALL(hiprandDestroyGenerator(generator)); - // CHECK: CUDA_CALL(hipFree(data)); - CURAND_CALL(curandDestroyGenerator(generator)); - CUDA_CALL(cudaFree(data)); -} - -void run_benchmarks(const cli::Parser& parser, const rng_type_t rng_type, - const std::string& distribution) { - if (distribution == "uniform-uint") { - // CHECK: if (rng_type != HIPRAND_RNG_QUASI_SOBOL64 && - // CHECK: rng_type != HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - if (rng_type != CURAND_RNG_QUASI_SOBOL64 && - rng_type != CURAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, unsigned int* data, size_t size) { - // CHECK: return hiprandGenerate(gen, data, size); - [](curandGenerator_t gen, unsigned int* data, size_t size) { - return curandGenerate(gen, data, size); - }); - } - } - if (distribution == "uniform-long-long") { - // CHECK: if (rng_type == HIPRAND_RNG_QUASI_SOBOL64 || - // CHECK: rng_type == HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64) - if (rng_type == CURAND_RNG_QUASI_SOBOL64 || - rng_type == CURAND_RNG_QUASI_SCRAMBLED_SOBOL64) { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, unsigned long long* data, size_t size) { - [](curandGenerator_t gen, unsigned long long* data, size_t size) { - // curandGenerateLongLong is yet unsupported by HIP - // CHECK-NOT: return hiprandGenerateLongLong(gen, data, size); - return curandGenerateLongLong(gen, data, size); - }); - } - } - if (distribution == "uniform-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateUniform(gen, data, size); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateUniform(gen, data, size); - }); - } - if (distribution == "uniform-double") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateUniformDouble(gen, data, size); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateUniformDouble(gen, data, size); - }); - } - if (distribution == "normal-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateNormal(gen, data, size, 0.0f, 1.0f); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateNormal(gen, data, size, 0.0f, 1.0f); - }); - } - if (distribution == "normal-double") { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateNormalDouble(gen, data, size, 0.0, 1.0); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateNormalDouble(gen, data, size, 0.0, 1.0); - }); - } - if (distribution == "log-normal-float") { - run_benchmark(parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, float* data, size_t size) { - // CHECK: return hiprandGenerateLogNormal(gen, data, size, 0.0f, 1.0f); - [](curandGenerator_t gen, float* data, size_t size) { - return curandGenerateLogNormal(gen, data, size, 0.0f, 1.0f); - }); - } - if (distribution == "log-normal-double") { - run_benchmark( - parser, rng_type, - // CHECK: [](hiprandGenerator_t gen, double* data, size_t size) { - // CHECK: return hiprandGenerateLogNormalDouble(gen, data, size, 0.0, 1.0); - [](curandGenerator_t gen, double* data, size_t size) { - return curandGenerateLogNormalDouble(gen, data, size, 0.0, 1.0); - }); - } - if (distribution == "poisson") { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) { - std::cout << " " - << "lambda " << std::fixed << std::setprecision(1) << lambda << std::endl; - run_benchmark( - parser, rng_type, - // CHECK: [lambda](hiprandGenerator_t gen, unsigned int* data, size_t size) { - // CHECK: return hiprandGeneratePoisson(gen, data, size, lambda); - [lambda](curandGenerator_t gen, unsigned int* data, size_t size) { - return curandGeneratePoisson(gen, data, size, lambda); - }); - } - } -} - -const std::vector all_engines = { - "xorwow", "mrg32k3a", "mtgp32", - // "mt19937", - "philox", "sobol32", - // "scrambled_sobol32", - // "sobol64", - // "scrambled_sobol64", -}; - -const std::vector all_distributions = { - "uniform-uint", "uniform-long-long", "uniform-float", "uniform-double", "normal-float", - "normal-double", "log-normal-float", "log-normal-double", "poisson"}; - -int main(int argc, char* argv[]) { - cli::Parser parser(argc, argv); - - const std::string distribution_desc = - "space-separated list of distributions:" + - std::accumulate(all_distributions.begin(), all_distributions.end(), std::string(), - [](std::string a, std::string b) { return a + "\n " + b; }) + - "\n or all"; - const std::string engine_desc = - "space-separated list of random number engines:" + - std::accumulate(all_engines.begin(), all_engines.end(), std::string(), - [](std::string a, std::string b) { return a + "\n " + b; }) + - "\n or all"; - - parser.set_optional("size", "size", DEFAULT_RAND_N, "number of values"); - parser.set_optional("dimensions", "dimensions", 1, - "number of dimensions of quasi-random values"); - parser.set_optional("trials", "trials", 20, "number of trials"); - parser.set_optional>("dis", "dis", {"uniform-uint"}, - distribution_desc.c_str()); - parser.set_optional>("engine", "engine", {"philox"}, - engine_desc.c_str()); - parser.set_optional>( - "lambda", "lambda", {10.0}, "space-separated list of lambdas of Poisson distribution"); - parser.run_and_exit_if_error(); - - std::vector engines; - { - auto es = parser.get>("engine"); - if (std::find(es.begin(), es.end(), "all") != es.end()) { - engines = all_engines; - } else { - for (auto e : all_engines) { - if (std::find(es.begin(), es.end(), e) != es.end()) engines.push_back(e); - } - } - } - - std::vector distributions; - { - auto ds = parser.get>("dis"); - if (std::find(ds.begin(), ds.end(), "all") != ds.end()) { - distributions = all_distributions; - } else { - for (auto d : all_distributions) { - if (std::find(ds.begin(), ds.end(), d) != ds.end()) distributions.push_back(d); - } - } - } - - int version; - // CHECK: CURAND_CALL(hiprandGetVersion(&version)); - CURAND_CALL(curandGetVersion(&version)); - int runtime_version; - // cudaRuntimeGetVersion is yet unsupported by HIP - // CHECK: CUDA_CALL(hipRuntimeGetVersion(&runtime_version)); - CUDA_CALL(cudaRuntimeGetVersion(&runtime_version)); - int device_id; - // CHECK: CUDA_CALL(hipGetDevice(&device_id)); - // CHECK: hipDeviceProp_t props; - // CHECK: CUDA_CALL(hipGetDeviceProperties(&props, device_id)); - CUDA_CALL(cudaGetDevice(&device_id)); - cudaDeviceProp props; - CUDA_CALL(cudaGetDeviceProperties(&props, device_id)); - - std::cout << "cuRAND: " << version << " "; - std::cout << "Runtime: " << runtime_version << " "; - std::cout << "Device: " << props.name; - std::cout << std::endl << std::endl; - - for (auto engine : engines) { - // CHECK: rng_type_t rng_type = HIPRAND_RNG_PSEUDO_XORWOW; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_XORWOW; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MRG32K3A; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MTGP32; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_MT19937; - // CHECK: rng_type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SOBOL32; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL32; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SOBOL64; - // CHECK: rng_type = HIPRAND_RNG_QUASI_SCRAMBLED_SOBOL64; - rng_type_t rng_type = CURAND_RNG_PSEUDO_XORWOW; - if (engine == "xorwow") - rng_type = CURAND_RNG_PSEUDO_XORWOW; - else if (engine == "mrg32k3a") - rng_type = CURAND_RNG_PSEUDO_MRG32K3A; - else if (engine == "mtgp32") - rng_type = CURAND_RNG_PSEUDO_MTGP32; - else if (engine == "mt19937") - rng_type = CURAND_RNG_PSEUDO_MT19937; - else if (engine == "philox") - rng_type = CURAND_RNG_PSEUDO_PHILOX4_32_10; - else if (engine == "sobol32") - rng_type = CURAND_RNG_QUASI_SOBOL32; - else if (engine == "scrambled_sobol32") - rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL32; - else if (engine == "sobol64") - rng_type = CURAND_RNG_QUASI_SOBOL64; - else if (engine == "scrambled_sobol64") - rng_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64; - else { - std::cout << "Wrong engine name" << std::endl; - exit(1); - } - - std::cout << engine << ":" << std::endl; - - for (auto distribution : distributions) { - std::cout << " " << distribution << ":" << std::endl; - run_benchmarks(parser, rng_type, distribution); - } - std::cout << std::endl; - } - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp deleted file mode 100644 index bff9b77cad..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/benchmark_curand_kernel.cpp +++ /dev/null @@ -1,673 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Copyright (c) 2017 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cmdparser.hpp" -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: if ((x) != hipSuccess) { -#define CUDA_CALL(x) \ - do { \ - if ((x) != cudaSuccess) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) -// CHECK: if ((x) != HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) \ - do { \ - if ((x) != CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n", __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#ifndef DEFAULT_RAND_N -const size_t DEFAULT_RAND_N = 1024 * 1024 * 128; -#endif - -size_t next_power2(size_t x) -{ - size_t power = 1; - while (power < x) - { - power *= 2; - } - return power; -} - -template -__global__ -void init_kernel(GeneratorState * states, - const unsigned long long seed, - const unsigned long long offset) -{ - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - GeneratorState state; - // CHECK: hiprand_init(seed, state_id, offset, &state); - curand_init(seed, state_id, offset, &state); - states[state_id] = state; -} - -template -__global__ -void generate_kernel(GeneratorState * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int stride = gridDim.x * blockDim.x; - - GeneratorState state = states[state_id]; - unsigned int index = state_id; - while(index < size) - { - data[index] = generate_func(&state, extra); - index += stride; - } - states[state_id] = state; -} - -template -struct runner -{ - GeneratorState * states; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - const size_t states_size = blocks * threads; - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(GeneratorState))); - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(GeneratorState))); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks), dim3(threads), 0, 0, states, seed, offset); - init_kernel<<>>(states, seed, offset); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - } - - ~runner() - { - CUDA_CALL(cudaFree(states)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks), dim3(threads), 0, 0, states, data, size, generate_func, extra); - generate_kernel<<>>(states, data, size, generate_func, extra); - } -}; - -// CHECK: void generate_kernel(hiprandStateMtgp32_t * states, -template -__global__ -void generate_kernel(curandStateMtgp32_t * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int state_id = blockIdx.x; - const unsigned int thread_id = threadIdx.x; - unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int stride = gridDim.x * blockDim.x; - // CHECK: __shared__ hiprandStateMtgp32_t state; - __shared__ curandStateMtgp32_t state; - - if (thread_id == 0) - state = states[state_id]; - __syncthreads(); - - const size_t r = size%blockDim.x; - const size_t size_rounded_up = r == 0 ? size : size + (blockDim.x - r); - while(index < size_rounded_up) - { - auto value = generate_func(&state, extra); - if(index < size) - data[index] = value; - index += stride; - } - __syncthreads(); - - if (thread_id == 0) - states[state_id] = state; -} - -// CHECK: struct runner -template<> -struct runner -{ - // CHECK: hiprandStateMtgp32_t * states; - curandStateMtgp32_t * states; - mtgp32_kernel_params_t * d_param; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - const size_t states_size = std::min((size_t)200, blocks); - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(hiprandStateMtgp32_t))); - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(curandStateMtgp32_t))); - // CHECK: CUDA_CALL(hipMalloc((void **)&d_param, sizeof(mtgp32_kernel_params))); - CUDA_CALL(cudaMalloc((void **)&d_param, sizeof(mtgp32_kernel_params))); - // CHECK: CURAND_CALL(hiprandMakeMTGP32Constants(mtgp32dc_params_fast_11213, d_param)); - CURAND_CALL(curandMakeMTGP32Constants(mtgp32dc_params_fast_11213, d_param)); - // CHECK: CURAND_CALL(hiprandMakeMTGP32KernelState(states, mtgp32dc_params_fast_11213, d_param, states_size, seed)); - CURAND_CALL(curandMakeMTGP32KernelState(states, mtgp32dc_params_fast_11213, d_param, states_size, seed)); - } - - ~runner() - { - // CHECK: CUDA_CALL(hipFree(states)); - // CHECK: CUDA_CALL(hipFree(d_param)); - CUDA_CALL(cudaFree(states)); - CUDA_CALL(cudaFree(d_param)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(std::min((size_t)200, blocks)), dim3(256), 0, 0, states, data, size, generate_func, extra); - generate_kernel<<>>(states, data, size, generate_func, extra); - } -}; - -// CHECK: void init_kernel(hiprandStateSobol32_t * states, -template -__global__ -void init_kernel(curandStateSobol32_t * states, - const Directions directions, - const unsigned long long offset) -{ - const unsigned int dimension = blockIdx.y; - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - // CHECK: hiprandStateSobol32_t state; - // CHECK: hiprand_init(directions[dimension], offset + state_id, &state); - curandStateSobol32_t state; - curand_init(directions[dimension], offset + state_id, &state); - states[gridDim.x * blockDim.x * dimension + state_id] = state; -} - -// CHECK: void generate_kernel(hiprandStateSobol32_t * states, -template -__global__ -void generate_kernel(curandStateSobol32_t * states, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) -{ - const unsigned int dimension = blockIdx.y; - const unsigned int state_id = blockIdx.x * blockDim.x + threadIdx.x; - const unsigned int stride = gridDim.x * blockDim.x; - // CHECK: hiprandStateSobol32_t state = states[gridDim.x * blockDim.x * dimension + state_id]; - curandStateSobol32_t state = states[gridDim.x * blockDim.x * dimension + state_id]; - const unsigned int offset = dimension * size; - unsigned int index = state_id; - while(index < size) - { - data[offset + index] = generate_func(&state, extra); - skipahead(stride - 1, &state); - index += stride; - } - state = states[gridDim.x * blockDim.x * dimension + state_id]; - skipahead(static_cast(size), &state); - states[gridDim.x * blockDim.x * dimension + state_id] = state; -} - -// CHECK: struct runner -template<> -struct runner -{ - // CHECK: hiprandStateSobol32_t * states; - curandStateSobol32_t * states; - size_t dimensions; - - runner(const size_t dimensions, - const size_t blocks, - const size_t threads, - const unsigned long long seed, - const unsigned long long offset) - { - this->dimensions = dimensions; - // CHECK: CUDA_CALL(hipMalloc((void **)&states, states_size * sizeof(hiprandStateSobol32_t))); - const size_t states_size = blocks * threads * dimensions; - CUDA_CALL(cudaMalloc((void **)&states, states_size * sizeof(curandStateSobol32_t))); - // CHECK: hiprandDirectionVectors32_t * directions; - curandDirectionVectors32_t * directions; - // CHECK: const size_t size = dimensions * sizeof(hiprandDirectionVectors32_t); - const size_t size = dimensions * sizeof(curandDirectionVectors32_t); - // CHECK: CUDA_CALL(hipMalloc((void **)&directions, size)); - CUDA_CALL(cudaMalloc((void **)&directions, size)); - // CHECK: hiprandDirectionVectors32_t * h_directions; - curandDirectionVectors32_t * h_directions; - // hiprandGetDirectionVectors32 and HIPRAND_DIRECTION_VECTORS_32_JOEKUO6 (of hiprandDirectionVectorSet_t) are yet unsupported by HIP - // CHECK-NOT: CURAND_CALL(hiprandGetDirectionVectors32(&h_directions, HIPRAND_DIRECTION_VECTORS_32_JOEKUO6)); - CURAND_CALL(curandGetDirectionVectors32(&h_directions, CURAND_DIRECTION_VECTORS_32_JOEKUO6)); - // CHECK: CUDA_CALL(hipMemcpy(directions, h_directions, size, hipMemcpyHostToDevice)); - CUDA_CALL(cudaMemcpy(directions, h_directions, size, cudaMemcpyHostToDevice)); - - const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(init_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, directions, offset); - init_kernel<<>>(states, directions, offset); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - // CHECK: CUDA_CALL(hipFree(directions)); - CUDA_CALL(cudaFree(directions)); - } - - ~runner() - { - // CHECK: CUDA_CALL(hipFree(states)); - CUDA_CALL(cudaFree(states)); - } - - template - void generate(const size_t blocks, - const size_t threads, - T * data, - const size_t size, - const GenerateFunc& generate_func, - const Extra extra) - { - const size_t blocks_x = next_power2((blocks + dimensions - 1) / dimensions); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(generate_kernel), dim3(blocks_x, dimensions), dim3(threads), 0, 0, states, data, size / dimensions, generate_func, extra); - generate_kernel<<>>(states, data, size / dimensions, generate_func, extra); - } -}; - -template -void run_benchmark(const cli::Parser& parser, - const GenerateFunc& generate_func, - const Extra extra) -{ - const size_t size = parser.get("size"); - const size_t dimensions = parser.get("dimensions"); - const size_t trials = parser.get("trials"); - - const size_t blocks = parser.get("blocks"); - const size_t threads = parser.get("threads"); - - T * data; - // CHECK: CUDA_CALL(hipMalloc((void **)&data, size * sizeof(T))); - CUDA_CALL(cudaMalloc((void **)&data, size * sizeof(T))); - - runner r(dimensions, blocks, threads, 12345ULL, 6789ULL); - - // Warm-up - for (size_t i = 0; i < 5; i++) - { - r.generate(blocks, threads, data, size, generate_func, extra); - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - } - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaDeviceSynchronize()); - - // Measurement - auto start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < trials; i++) - { - r.generate(blocks, threads, data, size, generate_func, extra); - } - // CHECK: CUDA_CALL(hipPeekAtLastError()); - // CHECK: CUDA_CALL(hipDeviceSynchronize()); - CUDA_CALL(cudaPeekAtLastError()); - CUDA_CALL(cudaDeviceSynchronize()); - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration elapsed = end - start; - - std::cout << std::fixed << std::setprecision(3) - << " " - << "Throughput = " - << std::setw(8) << (trials * size * sizeof(T)) / - (elapsed.count() / 1e3 * (1 << 30)) - << " GB/s, Samples = " - << std::setw(8) << (trials * size) / - (elapsed.count() / 1e3 * (1 << 30)) - << " GSample/s, AvgTime (1 trial) = " - << std::setw(8) << elapsed.count() / trials - << " ms, Time (all) = " - << std::setw(8) << elapsed.count() - << " ms, Size = " << size - << std::endl; - // CHECK: CUDA_CALL(hipFree(data)); - CUDA_CALL(cudaFree(data)); -} - -template -void run_benchmarks(const cli::Parser& parser, - const std::string& distribution) -{ - if (distribution == "uniform-uint") - { - // curandStateSobol64_t and curandStateScrambledSobol64_t are yet unsupported by HIP - // CHECK-NOT: if (!std::is_same::value && - // CHECK-NOT: !std::is_same::value) - if (!std::is_same::value && - !std::is_same::value) - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand(state); - return curand(state); - }, 0 - ); - } - } - if (distribution == "uniform-long-long") - { - // curandStateSobol64_t and curandStateScrambledSobol64_t are yet unsupported by HIP - // CHECK-NOT: if (!std::is_same::value && - // CHECK-NOT: !std::is_same::value) - if (std::is_same::value || - std::is_same::value) - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand(state); - return curand(state); - }, 0 - ); - } - } - if (distribution == "uniform-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_uniform(state); - return curand_uniform(state); - }, 0 - ); - } - if (distribution == "uniform-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_uniform_double(state); - return curand_uniform_double(state); - }, 0 - ); - } - if (distribution == "normal-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_normal(state); - return curand_normal(state); - }, 0 - ); - } - if (distribution == "normal-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_normal_double(state); - return curand_normal_double(state); - }, 0 - ); - } - if (distribution == "log-normal-float") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_log_normal(state, 0.0f, 1.0f); - return curand_log_normal(state, 0.0f, 1.0f); - }, 0 - ); - } - if (distribution == "log-normal-double") - { - run_benchmark(parser, - [] __device__ (GeneratorState * state, int) { - // CHECK: return hiprand_log_normal_double(state, 0.0, 1.0); - return curand_log_normal_double(state, 0.0, 1.0); - }, 0 - ); - } - if (distribution == "poisson") - { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) - { - std::cout << " " << "lambda " - << std::fixed << std::setprecision(1) << lambda << std::endl; - run_benchmark(parser, - [] __device__ (GeneratorState * state, double lambda) { - // CHECK: return hiprand_poisson(state, lambda); - return curand_poisson(state, lambda); - }, lambda - ); - } - } - if (distribution == "discrete-poisson") - { - const auto lambdas = parser.get>("lambda"); - for (double lambda : lambdas) - { - std::cout << " " << "lambda " - << std::fixed << std::setprecision(1) << lambda << std::endl; - // CHECK: hiprandDiscreteDistribution_t discrete_distribution; - curandDiscreteDistribution_t discrete_distribution; - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(lambda, &discrete_distribution)); - CURAND_CALL(curandCreatePoissonDistribution(lambda, &discrete_distribution)); - run_benchmark(parser, - // CHECK: [] __device__ (GeneratorState * state, hiprandDiscreteDistribution_t discrete_distribution) { - [] __device__ (GeneratorState * state, curandDiscreteDistribution_t discrete_distribution) { - // CHECK: return hiprand_discrete(state, discrete_distribution); - return curand_discrete(state, discrete_distribution); - }, discrete_distribution - ); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(discrete_distribution)); - CURAND_CALL(curandDestroyDistribution(discrete_distribution)); - } - } -} - -const std::vector all_engines = { - "xorwow", - "mrg32k3a", - "mtgp32", - // "mt19937", - "philox", - "sobol32", - // "scrambled_sobol32", - // "sobol64", - // "scrambled_sobol64", -}; - -const std::vector all_distributions = { - "uniform-uint", - // "uniform-long-long", - "uniform-float", - "uniform-double", - "normal-float", - "normal-double", - "log-normal-float", - "log-normal-double", - "poisson", - "discrete-poisson", -}; - -int main(int argc, char *argv[]) -{ - cli::Parser parser(argc, argv); - - const std::string distribution_desc = - "space-separated list of distributions:" + - std::accumulate(all_distributions.begin(), all_distributions.end(), std::string(), - [](std::string a, std::string b) { - return a + "\n " + b; - } - ) + - "\n or all"; - const std::string engine_desc = - "space-separated list of random number engines:" + - std::accumulate(all_engines.begin(), all_engines.end(), std::string(), - [](std::string a, std::string b) { - return a + "\n " + b; - } - ) + - "\n or all"; - - parser.set_optional("size", "size", DEFAULT_RAND_N, "number of values"); - parser.set_optional("dimensions", "dimensions", 1, "number of dimensions of quasi-random values"); - parser.set_optional("trials", "trials", 20, "number of trials"); - parser.set_optional("blocks", "blocks", 256, "number of blocks"); - parser.set_optional("threads", "threads", 256, "number of threads in each block"); - parser.set_optional>("dis", "dis", {"uniform-uint"}, distribution_desc.c_str()); - parser.set_optional>("engine", "engine", {"philox"}, engine_desc.c_str()); - parser.set_optional>("lambda", "lambda", {10.0}, "space-separated list of lambdas of Poisson distribution"); - parser.run_and_exit_if_error(); - - std::vector engines; - { - auto es = parser.get>("engine"); - if (std::find(es.begin(), es.end(), "all") != es.end()) - { - engines = all_engines; - } - else - { - for (auto e : all_engines) - { - if (std::find(es.begin(), es.end(), e) != es.end()) - engines.push_back(e); - } - } - } - - std::vector distributions; - { - auto ds = parser.get>("dis"); - if (std::find(ds.begin(), ds.end(), "all") != ds.end()) - { - distributions = all_distributions; - } - else - { - for (auto d : all_distributions) - { - if (std::find(ds.begin(), ds.end(), d) != ds.end()) - distributions.push_back(d); - } - } - } - - int version; - // CHECK: CURAND_CALL(hiprandGetVersion(&version)); - CURAND_CALL(curandGetVersion(&version)); - int runtime_version; - // cudaRuntimeGetVersion is yet unsupported by HIP - // CHECK: CUDA_CALL(hipRuntimeGetVersion(&runtime_version)); - CUDA_CALL(cudaRuntimeGetVersion(&runtime_version)); - int device_id; - // CHECK: CUDA_CALL(hipGetDevice(&device_id)); - // CHECK: hipDeviceProp_t props; - // CHECK: CUDA_CALL(hipGetDeviceProperties(&props, device_id)); - CUDA_CALL(cudaGetDevice(&device_id)); - cudaDeviceProp props; - CUDA_CALL(cudaGetDeviceProperties(&props, device_id)); - - std::cout << "cuRAND: " << version << " "; - std::cout << "Runtime: " << runtime_version << " "; - std::cout << "Device: " << props.name; - std::cout << std::endl << std::endl; - - for (auto engine : engines) - { - std::cout << engine << ":" << std::endl; - for (auto distribution : distributions) - { - std::cout << " " << distribution << ":" << std::endl; - const std::string plot_name = engine + "-" + distribution; - if (engine == "xorwow") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "mrg32k3a") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "philox") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "sobol32") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - else if (engine == "mtgp32") - { - // CHECK: run_benchmarks(parser, distribution); - run_benchmarks(parser, distribution); - } - } - } - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp b/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp deleted file mode 100644 index 1fe8ad9ce2..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/cmdparser.hpp +++ /dev/null @@ -1,494 +0,0 @@ -// The MIT License (MIT) -// -// Copyright (c) 2015 - 2016 Florian Rappl -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -/* - This file is part of the C++ CmdParser utility. - Copyright (c) 2015 - 2016 Florian Rappl -*/ - -#pragma once -#include -#include -#include -#include -#include -#include - -namespace cli { -struct CallbackArgs { - const std::vector& arguments; - std::ostream& output; - std::ostream& error; -}; -class Parser { - private: - class CmdBase { - public: - explicit CmdBase(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant, - bool variadic) - : name(name), - command(name.size() > 0 ? "-" + name : ""), - alternative(alternative.size() > 0 ? "--" + alternative : ""), - description(description), - required(required), - handled(false), - arguments({}), - dominant(dominant), - variadic(variadic) {} - - virtual ~CmdBase() {} - - std::string name; - std::string command; - std::string alternative; - std::string description; - bool required; - bool handled; - std::vector arguments; - bool const dominant; - bool const variadic; - - virtual std::string print_value() const = 0; - virtual bool parse(std::ostream& output, std::ostream& error) = 0; - - bool is(const std::string& given) const { return given == command || given == alternative; } - }; - - template - struct ArgumentCountChecker { - static constexpr bool Variadic = false; - }; - - template - struct ArgumentCountChecker> { - static constexpr bool Variadic = true; - }; - - template - class CmdFunction final : public CmdBase { - public: - explicit CmdFunction(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant) - : CmdBase(name, alternative, description, required, dominant, - ArgumentCountChecker::Variadic) {} - - virtual bool parse(std::ostream& output, std::ostream& error) { - try { - CallbackArgs args{arguments, output, error}; - value = callback(args); - return true; - } catch (...) { - return false; - } - } - - virtual std::string print_value() const { return ""; } - - std::function callback; - T value; - }; - - template - class CmdArgument final : public CmdBase { - public: - explicit CmdArgument(const std::string& name, const std::string& alternative, - const std::string& description, bool required, bool dominant) - : CmdBase(name, alternative, description, required, dominant, - ArgumentCountChecker::Variadic) {} - - virtual bool parse(std::ostream&, std::ostream&) { - try { - value = Parser::parse(arguments, value); - return true; - } catch (...) { - return false; - } - } - - virtual std::string print_value() const { return stringify(value); } - - T value; - }; - - static int parse(const std::vector& elements, const int&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoi(elements[0]); - } - - static bool parse(const std::vector& elements, const bool& defval) { - if (elements.size() != 0) - throw std::runtime_error("A boolean command line parameter cannot have any arguments."); - - return !defval; - } - - static double parse(const std::vector& elements, const double&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stod(elements[0]); - } - - static float parse(const std::vector& elements, const float&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stof(elements[0]); - } - - static long double parse(const std::vector& elements, const long double&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stold(elements[0]); - } - - static unsigned int parse(const std::vector& elements, const unsigned int&) { - if (elements.size() != 1) throw std::bad_cast(); - - return static_cast(std::stoul(elements[0])); - } - - static unsigned long parse(const std::vector& elements, const unsigned long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoul(elements[0]); - } - - static unsigned long long parse(const std::vector& elements, - const unsigned long long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stoull(elements[0]); - } - - static long parse(const std::vector& elements, const long&) { - if (elements.size() != 1) throw std::bad_cast(); - - return std::stol(elements[0]); - } - - static std::string parse(const std::vector& elements, const std::string&) { - if (elements.size() != 1) throw std::bad_cast(); - - return elements[0]; - } - - template - static std::vector parse(const std::vector& elements, const std::vector&) { - const T defval = T(); - std::vector values{}; - std::vector buffer(1); - - for (const auto& element : elements) { - buffer[0] = element; - values.push_back(parse(buffer, defval)); - } - - return values; - } - - template - static std::string stringify(const T& value) { - return std::to_string(value); - } - - template - static std::string stringify(const std::vector& values) { - std::stringstream ss{}; - ss << "[ "; - - for (const auto& value : values) { - ss << stringify(value) << " "; - } - - ss << "]"; - return ss.str(); - } - - static std::string stringify(const std::string& str) { return str; } - - public: - explicit Parser(int argc, const char** argv) : _appname(argv[0]) { - for (int i = 1; i < argc; ++i) { - _arguments.push_back(argv[i]); - } - enable_help(); - } - - explicit Parser(int argc, char** argv) : _appname(argv[0]) { - for (int i = 1; i < argc; ++i) { - _arguments.push_back(argv[i]); - } - enable_help(); - } - - ~Parser() { - for (int i = 0, n = _commands.size(); i < n; ++i) { - delete _commands[i]; - } - } - - bool has_help() const { - for (const auto command : _commands) { - if (command->name == "h" && command->alternative == "--help") { - return true; - } - } - - return false; - } - - void enable_help() { - set_callback("h", "help", std::function([this](CallbackArgs& args) { - args.output << this->usage(); - exit(0); - return false; - }), - "", true); - } - - void disable_help() { - for (auto command = _commands.begin(); command != _commands.end(); ++command) { - if ((*command)->name == "h" && (*command)->alternative == "--help") { - _commands.erase(command); - break; - } - } - } - - template - void set_default(bool is_required, const std::string& description = "") { - auto command = new CmdArgument{"", "", description, is_required, false}; - _commands.push_back(command); - } - - template - void set_required(const std::string& name, const std::string& alternative, - const std::string& description = "", bool dominant = false) { - auto command = new CmdArgument{name, alternative, description, true, dominant}; - _commands.push_back(command); - } - - template - void set_optional(const std::string& name, const std::string& alternative, T defaultValue, - const std::string& description = "", bool dominant = false) { - auto command = new CmdArgument{name, alternative, description, false, dominant}; - command->value = defaultValue; - _commands.push_back(command); - } - - template - void set_callback(const std::string& name, const std::string& alternative, - std::function callback, const std::string& description = "", - bool dominant = false) { - auto command = new CmdFunction{name, alternative, description, false, dominant}; - command->callback = callback; - _commands.push_back(command); - } - - inline void run_and_exit_if_error() { - if (run() == false) { - exit(1); - } - } - - inline bool run() { return run(std::cout, std::cerr); } - - inline bool run(std::ostream& output) { return run(output, std::cerr); } - - bool run(std::ostream& output, std::ostream& error) { - if (_arguments.size() > 0) { - auto current = find_default(); - - for (int i = 0, n = _arguments.size(); i < n; ++i) { - auto isarg = _arguments[i].size() > 0 && _arguments[i][0] == '-'; - auto associated = isarg ? find(_arguments[i]) : nullptr; - - if (associated != nullptr) { - current = associated; - associated->handled = true; - } else if (current == nullptr) { - error << no_default(); - return false; - } else { - current->arguments.push_back(_arguments[i]); - current->handled = true; - if (!current->variadic) { - // If the current command is not variadic, then no more arguments - // should be added to it. In this case, switch back to the default - // command. - current = find_default(); - } - } - } - } - - // First, parse dominant arguments since they succeed even if required - // arguments are missing. - for (auto command : _commands) { - if (command->handled && command->dominant && !command->parse(output, error)) { - error << howto_use(command); - return false; - } - } - - // Next, check for any missing arguments. - for (auto command : _commands) { - if (command->required && !command->handled) { - error << howto_required(command); - return false; - } - } - - // Finally, parse all remaining arguments. - for (auto command : _commands) { - if (command->handled && !command->dominant && !command->parse(output, error)) { - error << howto_use(command); - return false; - } - } - - return true; - } - - template - T get(const std::string& name) const { - for (const auto& command : _commands) { - if (command->name == name) { - auto cmd = dynamic_cast*>(command); - - if (cmd == nullptr) { - throw std::runtime_error("Invalid usage of the parameter " + name + - " detected."); - } - - return cmd->value; - } - } - - throw std::runtime_error("The parameter " + name + " could not be found."); - } - - template - T get_if(const std::string& name, std::function callback) const { - auto value = get(name); - return callback(value); - } - - int requirements() const { - int count = 0; - - for (const auto& command : _commands) { - if (command->required) { - ++count; - } - } - - return count; - } - - int commands() const { return static_cast(_commands.size()); } - - inline const std::string& app_name() const { return _appname; } - - protected: - CmdBase* find(const std::string& name) { - for (auto command : _commands) { - if (command->is(name)) { - return command; - } - } - - return nullptr; - } - - CmdBase* find_default() { - for (auto command : _commands) { - if (command->name == "") { - return command; - } - } - - return nullptr; - } - - std::string usage() const { - std::stringstream ss{}; - ss << "Available parameters:\n\n"; - - for (const auto& command : _commands) { - ss << " " << command->command << "\t" << command->alternative; - - if (command->required == true) { - ss << "\t(required)"; - } - - ss << "\n " << command->description; - - if (command->required == false) { - ss << "\n " - << "This parameter is optional. The default value is '" + command->print_value() - << "'."; - } - - ss << "\n\n"; - } - - return ss.str(); - } - - void print_help(std::stringstream& ss) const { - if (has_help()) { - ss << "For more help use --help or -h.\n"; - } - } - - std::string howto_required(CmdBase* command) const { - std::stringstream ss{}; - ss << "The parameter " << command->name << " is required.\n"; - ss << command->description << '\n'; - print_help(ss); - return ss.str(); - } - - std::string howto_use(CmdBase* command) const { - std::stringstream ss{}; - ss << "The parameter " << command->name << " has invalid arguments.\n"; - ss << command->description << '\n'; - print_help(ss); - return ss.str(); - } - - std::string no_default() const { - std::stringstream ss{}; - ss << "No default parameter has been specified.\n"; - ss << "The given argument must be used with a parameter.\n"; - print_help(ss); - return ss.str(); - } - - private: - const std::string _appname; - std::vector _arguments; - std::vector _commands; -}; -} // namespace cli diff --git a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu b/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu deleted file mode 100644 index 567de05e6e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuRAND/poisson_api_example.cu +++ /dev/null @@ -1,416 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Taken from: http://docs.nvidia.com/cuda/curand/device-api-overview.html#poisson-api-example -/* - * This program uses CURAND library for Poisson distribution - * to simulate queues in store for 16 hours. It shows the - * difference of using 3 different APIs: - * - HOST API -arrival of customers is described by Poisson(4) - * - SIMPLE DEVICE API -arrival of customers is described by - * Poisson(4*(sin(x/100)+1)), where x is number of minutes - * from store opening time. - * - ROBUST DEVICE API -arrival of customers is described by: - * - Poisson(2) for first 3 hours. - * - Poisson(1) for second 3 hours. - * - Poisson(3) after 6 hours. - */ - -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// CHECK: #define CUDA_CALL(x) do { if((x) != hipSuccess) { -#define CUDA_CALL(x) do { if((x) != cudaSuccess) { \ - printf("Error at %s:%d\n",__FILE__,__LINE__); \ - return EXIT_FAILURE;}} while(0) -// CHECK: #define CURAND_CALL(x) do { if((x)!=HIPRAND_STATUS_SUCCESS) { -#define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \ - printf("Error at %s:%d\n",__FILE__,__LINE__);\ - return EXIT_FAILURE;}} while(0) - - -#define HOURS 16 -#define OPENING_HOUR 7 -#define CLOSING_HOUR (OPENING_HOUR + HOURS) - -#define access_2D(type, ptr, row, column, pitch)\ - *((type*)((char*)ptr + (row) * pitch) + column) - -enum API_TYPE { - HOST_API = 0, - SIMPLE_DEVICE_API = 1, - ROBUST_DEVICE_API = 2, -}; - -/* global variables */ -API_TYPE api; -int report_break; -int cashiers_load_h[HOURS]; -__constant__ int cashiers_load[HOURS]; -// CHECK: __global__ void setup_kernel(hiprandState *state) -__global__ void setup_kernel(curandState *state) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - /* Each thread gets same seed, a different sequence - number, no offset */ - // CHECK: hiprand_init(1234, id, 0, &state[id]); - curand_init(1234, id, 0, &state[id]); -} - -__inline__ __device__ -void update_queue(int id, int min, unsigned int new_customers, - unsigned int &queue_length, - unsigned int *queue_lengths, size_t pitch) -{ - int balance; - balance = new_customers - 2 * cashiers_load[(min-1)/60]; - if (balance + (int)queue_length <= 0){ - queue_length = 0; - }else{ - queue_length += balance; - } - /* Store results */ - access_2D(unsigned int, queue_lengths, min-1, id, pitch) - = queue_length; -} - -// CHECK: __global__ void simple_device_API_kernel(hiprandState *state, -__global__ void simple_device_API_kernel(curandState *state, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Copy state to local memory for efficiency */ - // CHECK: hiprandState localState = state[id]; - curandState localState = state[id]; - /* Simulate queue in time */ - for(int min = 1; min <= 60 * HOURS; min++) { - /* Draw number of new customers depending on API */ - // CHECK: new_customers = hiprand_poisson(&localState, - new_customers = curand_poisson(&localState, - 4*(sin((float)min/100.0)+1)); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* Copy state back to global memory */ - state[id] = localState; -} - - -__global__ void host_API_kernel(unsigned int *poisson_numbers, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * blockDim.x; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Simulate queue in time */ - for(int min = 1; min <= 60 * HOURS; min++) { - /* Get random number from global memory */ - new_customers = poisson_numbers - [blockDim.x * gridDim.x * (min -1) + id]; - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } -} -// CHECK: __global__ void robust_device_API_kernel(hiprandState *state, -// CHECK: hiprandDiscreteDistribution_t poisson_1, -// CHECK: hiprandDiscreteDistribution_t poisson_2, -// CHECK: hiprandDiscreteDistribution_t poisson_3, -__global__ void robust_device_API_kernel(curandState *state, - curandDiscreteDistribution_t poisson_1, - curandDiscreteDistribution_t poisson_2, - curandDiscreteDistribution_t poisson_3, - unsigned int *queue_lengths, size_t pitch) -{ - int id = threadIdx.x + blockIdx.x * 64; - unsigned int new_customers; - unsigned int queue_length = 0; - /* Copy state to local memory for efficiency */ - // CHECK: hiprandState localState = state[id]; - curandState localState = state[id]; - /* Simulate queue in time */ - /* first 3 hours */ - for(int min = 1; min <= 60 * 3; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_2); - curand_discrete(&localState, poisson_2); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* second 3 hours */ - for(int min = 60 * 3 + 1; min <= 60 * 6; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_1); - curand_discrete(&localState, poisson_1); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* after 6 hours */ - for(int min = 60 * 6 + 1; min <= 60 * HOURS; min++) { - /* draw number of new customers depending on API */ - new_customers = - // CHECK: hiprand_discrete(&localState, poisson_3); - curand_discrete(&localState, poisson_3); - /* Update queue */ - update_queue(id, min, new_customers, queue_length, - queue_lengths, pitch); - } - /* Copy state back to global memory */ - state[id] = localState; -} - -/* Set time intervals between reports */ -void report_settings() -{ - do{ - printf("Set time intervals between queue reports"); - printf("(in minutes > 0)\n"); - if (scanf("%d", &report_break) == 0) continue; - }while(report_break <= 0); -} - - -/* Set number of cashiers each hour */ -void add_cachiers(int *cashiers_load) -{ - int i, min, max, begin, end; - printf("Cashier serves 2 customers per minute...\n"); - for (i = 0; i < HOURS; i++){ - cashiers_load_h[i] = 0; - } - while (true){ - printf("Adding cashier...\n"); - min = OPENING_HOUR; - max = CLOSING_HOUR-1; - do{ - printf("Set hour that cahier comes (%d-%d)", - min, max); - printf(" [type 0 to finish adding cashiers]\n"); - if (scanf("%d", &begin) == 0) continue; - }while (begin > max || (begin < min && begin != 0)); - if (begin == 0) break; - min = begin+1; - max = CLOSING_HOUR; - do{ - printf("Set hour that cahier leaves (%d-%d)", - min, max); - printf(" [type 0 to finish adding cashiers]\n"); - if (scanf("%d", &end) == 0) continue; - }while (end > max || (end < min && end != 0)); - if (end == 0) break; - for (i = begin - OPENING_HOUR; - i < end - OPENING_HOUR; i++){ - cashiers_load_h[i]++; - } - } - for (i = OPENING_HOUR; i < CLOSING_HOUR; i++){ - printf("\n%2d:00 - %2d:00 %d cashier", - i, i+1, cashiers_load_h[i-OPENING_HOUR]); - if (cashiers_load[i-OPENING_HOUR] != 1) printf("s"); - } - printf("\n"); -} - -/* Set API type */ -API_TYPE set_API_type() -{ - printf("Choose API type:\n"); - int choose; - do{ - printf("type 1 for HOST API\n"); - printf("type 2 for SIMPLE DEVICE API\n"); - printf("type 3 for ROBUST DEVICE API\n"); - if (scanf("%d", &choose) == 0) continue; - }while( choose < 1 || choose > 3); - switch(choose){ - case 1: return HOST_API; - case 2: return SIMPLE_DEVICE_API; - case 3: return ROBUST_DEVICE_API; - default: - fprintf(stderr, "wrong API\n"); - return HOST_API; - } -} - -void settings() -{ - add_cachiers(cashiers_load); - // CHECK: hipMemcpyToSymbol(HIP_SYMBOL("cashiers_load"), cashiers_load_h, - // CHECK: HOURS * sizeof(int), 0, hipMemcpyHostToDevice); - cudaMemcpyToSymbol("cashiers_load", cashiers_load_h, - HOURS * sizeof(int), 0, cudaMemcpyHostToDevice); - report_settings(); - api = set_API_type(); -} - -void print_statistics(unsigned int *hostResults, size_t pitch) -{ - int min, i, hour, minute; - unsigned int sum; - for(min = report_break; min <= 60 * HOURS; - min += report_break) { - sum = 0; - for(i = 0; i < 64 * 64; i++) { - sum += access_2D(unsigned int, hostResults, - min-1, i, pitch); - } - hour = OPENING_HOUR + min/60; - minute = min%60; - printf("%2d:%02d # of waiting customers = %10.4g |", - hour, minute, (float)sum/(64.0 * 64.0)); - printf(" # of cashiers = %d | ", - cashiers_load_h[(min-1)/60]); - printf("# of new customers/min ~= "); - switch (api){ - case HOST_API: - printf("%2.2f\n", 4.0); - break; - case SIMPLE_DEVICE_API: - printf("%2.2f\n", - 4*(sin((float)min/100.0)+1)); - break; - case ROBUST_DEVICE_API: - if (min <= 3 * 60){ - printf("%2.2f\n", 2.0); - }else{ - if (min <= 6 * 60){ - printf("%2.2f\n", 1.0); - }else{ - printf("%2.2f\n", 3.0); - } - } - break; - default: - fprintf(stderr, "Wrong API\n"); - } - } -} - -int main(int argc, char *argv[]) -{ - int n; - size_t pitch; - // CHECK: hiprandState *devStates; - curandState *devStates; - unsigned int *devResults, *hostResults; - unsigned int *poisson_numbers_d; - // CHECK: hiprandDiscreteDistribution_t poisson_1, poisson_2; - // CHECK: hiprandDiscreteDistribution_t poisson_3; - // CHECK: hiprandGenerator_t gen; - curandDiscreteDistribution_t poisson_1, poisson_2; - curandDiscreteDistribution_t poisson_3; - curandGenerator_t gen; - - /* Setting cashiers, report and API */ - settings(); - - /* Allocate space for results on device */ - // CHECK: CUDA_CALL(hipMallocPitch((void **)&devResults, &pitch, - CUDA_CALL(cudaMallocPitch((void **)&devResults, &pitch, - 64 * 64 * sizeof(unsigned int), 60 * HOURS)); - - /* Allocate space for results on host */ - hostResults = (unsigned int *)calloc(pitch * 60 * HOURS, - sizeof(unsigned int)); - - /* Allocate space for prng states on device */ - // CHECK: CUDA_CALL(hipMalloc((void **)&devStates, 64 * 64 * - // CHECK: sizeof(hiprandState))); - CUDA_CALL(cudaMalloc((void **)&devStates, 64 * 64 * - sizeof(curandState))); - - /* Setup prng states */ - if (api != HOST_API){ - // CHECK: hipLaunchKernelGGL(setup_kernel, dim3(64), dim3(64), 0, 0, devStates); - setup_kernel<<<64, 64>>>(devStates); - } - /* Simulate queue */ - switch (api){ - case HOST_API: - /* Create pseudo-random number generator */ - // CHECK: CURAND_CALL(hiprandCreateGenerator(&gen, - // CHECK: HIPRAND_RNG_PSEUDO_DEFAULT)); - CURAND_CALL(curandCreateGenerator(&gen, - CURAND_RNG_PSEUDO_DEFAULT)); - /* Set seed */ - // CHECK: CURAND_CALL(hiprandSetPseudoRandomGeneratorSeed( - CURAND_CALL(curandSetPseudoRandomGeneratorSeed( - gen, 1234ULL)); - /* compute n */ - n = 64 * 64 * HOURS * 60; - /* Allocate n unsigned ints on device */ - // CHECK: CUDA_CALL(hipMalloc((void **)&poisson_numbers_d, - CUDA_CALL(cudaMalloc((void **)&poisson_numbers_d, - n * sizeof(unsigned int))); - /* Generate n unsigned ints on device */ - // CHECK: CURAND_CALL(hiprandGeneratePoisson(gen, - CURAND_CALL(curandGeneratePoisson(gen, - poisson_numbers_d, n, 4.0)); - // CHECK: hipLaunchKernelGGL(host_API_kernel, dim3(64), dim3(64), 0, 0, poisson_numbers_d, - host_API_kernel<<<64, 64>>>(poisson_numbers_d, - devResults, pitch); - /* Cleanup */ - // CHECK: CURAND_CALL(hiprandDestroyGenerator(gen)); - CURAND_CALL(curandDestroyGenerator(gen)); - break; - case SIMPLE_DEVICE_API: - // CHECK: hipLaunchKernelGGL(simple_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates, - simple_device_API_kernel<<<64, 64>>>(devStates, - devResults, pitch); - break; - case ROBUST_DEVICE_API: - /* Create histograms for Poisson(1) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(1.0, - CURAND_CALL(curandCreatePoissonDistribution(1.0, - &poisson_1)); - /* Create histograms for Poisson(2) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(2.0, - CURAND_CALL(curandCreatePoissonDistribution(2.0, - &poisson_2)); - /* Create histograms for Poisson(3) */ - // CHECK: CURAND_CALL(hiprandCreatePoissonDistribution(3.0, - CURAND_CALL(curandCreatePoissonDistribution(3.0, - &poisson_3)); - // CHECK: hipLaunchKernelGGL(robust_device_API_kernel, dim3(64), dim3(64), 0, 0, devStates, - robust_device_API_kernel<<<64, 64>>>(devStates, - poisson_1, poisson_2, poisson_3, - devResults, pitch); - /* Cleanup */ - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_1)); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_2)); - // CHECK: CURAND_CALL(hiprandDestroyDistribution(poisson_3)); - CURAND_CALL(curandDestroyDistribution(poisson_1)); - CURAND_CALL(curandDestroyDistribution(poisson_2)); - CURAND_CALL(curandDestroyDistribution(poisson_3)); - break; - default: - fprintf(stderr, "Wrong API\n"); - } - /* Copy device memory to host */ - // CHECK: CUDA_CALL(hipMemcpy2D(hostResults, pitch, devResults, - // CHECK: 60 * HOURS, hipMemcpyDeviceToHost)); - CUDA_CALL(cudaMemcpy2D(hostResults, pitch, devResults, - pitch, 64 * 64 * sizeof(unsigned int), - 60 * HOURS, cudaMemcpyDeviceToHost)); - /* Show result */ - print_statistics(hostResults, pitch); - /* Cleanup */ - // CHECK: CUDA_CALL(hipFree(devStates)); - // CHECK: CUDA_CALL(hipFree(devResults)); - CUDA_CALL(cudaFree(devStates)); - CUDA_CALL(cudaFree(devResults)); - free(hostResults); - return EXIT_SUCCESS; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu deleted file mode 100644 index 6e163d47a6..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_01.cu +++ /dev/null @@ -1,367 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -// CHECK: if (y) hipFree(y); -// CHECK: if (z) hipFree(z); -// CHECK: if (xInd) hipFree(xInd); -// CHECK: if (xVal) hipFree(xVal); -// CHECK: if (csrRowPtr) hipFree(csrRowPtr); -// CHECK: if (cooRowIndex) hipFree(cooRowIndex); -// CHECK: if (cooColIndex) hipFree(cooColIndex); -// CHECK: if (cooVal) hipFree(cooVal); -// CHECK: if (descr) hipsparseDestroyMatDescr(descr); -// CHECK: if (handle) hipsparseDestroy(handle); -// CHECK: hipDeviceReset(); -#define CLEANUP(s) \ -do { \ - printf ("%s\n", s); \ - if (yHostPtr) free(yHostPtr); \ - if (zHostPtr) free(zHostPtr); \ - if (xIndHostPtr) free(xIndHostPtr); \ - if (xValHostPtr) free(xValHostPtr); \ - if (cooRowIndexHostPtr) free(cooRowIndexHostPtr);\ - if (cooColIndexHostPtr) free(cooColIndexHostPtr);\ - if (cooValHostPtr) free(cooValHostPtr); \ - if (y) cudaFree(y); \ - if (z) cudaFree(z); \ - if (xInd) cudaFree(xInd); \ - if (xVal) cudaFree(xVal); \ - if (csrRowPtr) cudaFree(csrRowPtr); \ - if (cooRowIndex) cudaFree(cooRowIndex); \ - if (cooColIndex) cudaFree(cooColIndex); \ - if (cooVal) cudaFree(cooVal); \ - if (descr) cusparseDestroyMatDescr(descr);\ - if (handle) cusparseDestroy(handle); \ - cudaDeviceReset(); \ - fflush (stdout); \ -} while (0) - -int main(){ - // CHECK: hipError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6; - cudaError_t cudaStat1,cudaStat2,cudaStat3,cudaStat4,cudaStat5,cudaStat6; - // CHECK: hipsparseStatus_t status; - cusparseStatus_t status; - // CHECK: hipsparseHandle_t handle=0; - cusparseHandle_t handle=0; - // CHECK: hipsparseMatDescr_t descr=0; - cusparseMatDescr_t descr=0; - int * cooRowIndexHostPtr=0; - int * cooColIndexHostPtr=0; - double * cooValHostPtr=0; - int * cooRowIndex=0; - int * cooColIndex=0; - double * cooVal=0; - int * xIndHostPtr=0; - double * xValHostPtr=0; - double * yHostPtr=0; - int * xInd=0; - double * xVal=0; - double * y=0; - int * csrRowPtr=0; - double * zHostPtr=0; - double * z=0; - int n, nnz, nnz_vector; - double dzero =0.0; - double dtwo =2.0; - double dthree=3.0; - double dfive =5.0; - printf("testing example\n"); - /* create the following sparse test matrix in COO format */ - /* |1.0 2.0 3.0| - | 4.0 | - |5.0 6.0 7.0| - | 8.0 9.0| */ - n=4; nnz=9; - cooRowIndexHostPtr = (int *) malloc(nnz*sizeof(cooRowIndexHostPtr[0])); - cooColIndexHostPtr = (int *) malloc(nnz*sizeof(cooColIndexHostPtr[0])); - cooValHostPtr = (double *)malloc(nnz*sizeof(cooValHostPtr[0])); - if ((!cooRowIndexHostPtr) || (!cooColIndexHostPtr) || (!cooValHostPtr)){ - CLEANUP("Host malloc failed (matrix)"); - return 1; - } - cooRowIndexHostPtr[0]=0; cooColIndexHostPtr[0]=0; cooValHostPtr[0]=1.0; - cooRowIndexHostPtr[1]=0; cooColIndexHostPtr[1]=2; cooValHostPtr[1]=2.0; - cooRowIndexHostPtr[2]=0; cooColIndexHostPtr[2]=3; cooValHostPtr[2]=3.0; - cooRowIndexHostPtr[3]=1; cooColIndexHostPtr[3]=1; cooValHostPtr[3]=4.0; - cooRowIndexHostPtr[4]=2; cooColIndexHostPtr[4]=0; cooValHostPtr[4]=5.0; - cooRowIndexHostPtr[5]=2; cooColIndexHostPtr[5]=2; cooValHostPtr[5]=6.0; - cooRowIndexHostPtr[6]=2; cooColIndexHostPtr[6]=3; cooValHostPtr[6]=7.0; - cooRowIndexHostPtr[7]=3; cooColIndexHostPtr[7]=1; cooValHostPtr[7]=8.0; - cooRowIndexHostPtr[8]=3; cooColIndexHostPtr[8]=3; cooValHostPtr[8]=9.0; - nnz_vector = 3; - xIndHostPtr = (int *) malloc(nnz_vector*sizeof(xIndHostPtr[0])); - xValHostPtr = (double *)malloc(nnz_vector*sizeof(xValHostPtr[0])); - yHostPtr = (double *)malloc(2*n *sizeof(yHostPtr[0])); - zHostPtr = (double *)malloc(2*(n+1) *sizeof(zHostPtr[0])); - if((!xIndHostPtr) || (!xValHostPtr) || (!yHostPtr) || (!zHostPtr)) { - CLEANUP("Host malloc failed (vectors)"); - return 1; - } - yHostPtr[0] = 10.0; - xIndHostPtr[0]=0; - xValHostPtr[0]=100.0; - yHostPtr[1] = 20.0; - xIndHostPtr[1]=1; - xValHostPtr[1]=200.0; - yHostPtr[2] = 30.0; - yHostPtr[3] = 40.0; - xIndHostPtr[2]=3; - xValHostPtr[2]=400.0; - yHostPtr[4] = 50.0; - yHostPtr[5] = 60.0; - yHostPtr[6] = 70.0; - yHostPtr[7] = 80.0; - /* allocate GPU memory and copy the matrix and vectors into it */ - // CHECK: cudaStat1 = hipMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0])); - cudaStat1 = cudaMalloc((void**)&cooRowIndex,nnz*sizeof(cooRowIndex[0])); - // CHECK: cudaStat2 = hipMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0])); - cudaStat2 = cudaMalloc((void**)&cooColIndex,nnz*sizeof(cooColIndex[0])); - // CHECK: cudaStat3 = hipMalloc((void**)&cooVal, nnz*sizeof(cooVal[0])); - cudaStat3 = cudaMalloc((void**)&cooVal, nnz*sizeof(cooVal[0])); - // CHECK: cudaStat4 = hipMalloc((void**)&y, 2*n*sizeof(y[0])); - cudaStat4 = cudaMalloc((void**)&y, 2*n*sizeof(y[0])); - // CHECK: cudaStat5 = hipMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0])); - cudaStat5 = cudaMalloc((void**)&xInd,nnz_vector*sizeof(xInd[0])); - // CHECK: cudaStat6 = hipMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0])); - cudaStat6 = cudaMalloc((void**)&xVal,nnz_vector*sizeof(xVal[0])); - // CHECK: if ((cudaStat1 != hipSuccess) || - // CHECK: (cudaStat2 != hipSuccess) || - // CHECK: (cudaStat3 != hipSuccess) || - // CHECK: (cudaStat4 != hipSuccess) || - // CHECK: (cudaStat5 != hipSuccess) || - // CHECK: (cudaStat6 != hipSuccess)) { - if ((cudaStat1 != cudaSuccess) || - (cudaStat2 != cudaSuccess) || - (cudaStat3 != cudaSuccess) || - (cudaStat4 != cudaSuccess) || - (cudaStat5 != cudaSuccess) || - (cudaStat6 != cudaSuccess)) { - CLEANUP("Device malloc failed"); - return 1; - } - // CHECK: cudaStat1 = hipMemcpy(cooRowIndex, cooRowIndexHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(cooRowIndex, cooRowIndexHostPtr, - (size_t)(nnz*sizeof(cooRowIndex[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(cooColIndex, cooColIndexHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(cooColIndex, cooColIndexHostPtr, - (size_t)(nnz*sizeof(cooColIndex[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(cooVal, cooValHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(cooVal, cooValHostPtr, - (size_t)(nnz*sizeof(cooVal[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat4 = hipMemcpy(y, yHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat4 = cudaMemcpy(y, yHostPtr, - (size_t)(2*n*sizeof(y[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat5 = hipMemcpy(xInd, xIndHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat5 = cudaMemcpy(xInd, xIndHostPtr, - (size_t)(nnz_vector*sizeof(xInd[0])), - cudaMemcpyHostToDevice); - // CHECK: cudaStat6 = hipMemcpy(xVal, xValHostPtr, - // CHECK: hipMemcpyHostToDevice); - cudaStat6 = cudaMemcpy(xVal, xValHostPtr, - (size_t)(nnz_vector*sizeof(xVal[0])), - cudaMemcpyHostToDevice); - // CHECK: if ((cudaStat1 != hipSuccess) || - // CHECK: (cudaStat2 != hipSuccess) || - // CHECK: (cudaStat3 != hipSuccess) || - // CHECK: (cudaStat4 != hipSuccess) || - // CHECK: (cudaStat5 != hipSuccess) || - // CHECK: (cudaStat6 != hipSuccess)) { - if ((cudaStat1 != cudaSuccess) || - (cudaStat2 != cudaSuccess) || - (cudaStat3 != cudaSuccess) || - (cudaStat4 != cudaSuccess) || - (cudaStat5 != cudaSuccess) || - (cudaStat6 != cudaSuccess)) { - CLEANUP("Memcpy from Host to Device failed"); - return 1; - } - /* initialize cusparse library */ - // CHECK: status= hipsparseCreate(&handle); - status= cusparseCreate(&handle); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("CUSPARSE Library initialization failed"); - return 1; - } - /* create and setup matrix descriptor */ - // CHECK: status= hipsparseCreateMatDescr(&descr); - status= cusparseCreateMatDescr(&descr); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix descriptor initialization failed"); - return 1; - } - // CHECK: hipsparseSetMatType(descr,HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: hipsparseSetMatIndexBase(descr,HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO); - /* exercise conversion routines (convert matrix from COO 2 CSR format) */ - // CHECK: cudaStat1 = hipMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0])); - cudaStat1 = cudaMalloc((void**)&csrRowPtr,(n+1)*sizeof(csrRowPtr[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Device malloc failed (csrRowPtr)"); - return 1; - } - status= cusparseXcoo2csr(handle,cooRowIndex,nnz,n, - // CHECK: csrRowPtr,HIPSPARSE_INDEX_BASE_ZERO); - csrRowPtr,CUSPARSE_INDEX_BASE_ZERO); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Conversion from COO to CSR format failed"); - return 1; - } - //csrRowPtr = [0 3 4 7 9] - // The following test only works for compute capability 1.3 and above - // because it needs double precision. - int devId; - // CHECK: hipDeviceProp_t prop; - cudaDeviceProp prop; - // CHECK: hipError_t cudaStat; - cudaError_t cudaStat; - // CHECK: cudaStat = hipGetDevice(&devId); - cudaStat = cudaGetDevice(&devId); - // CHECK: if (hipSuccess != cudaStat){ - if (cudaSuccess != cudaStat){ - // CLEANUP("hipGetDevice failed"); - CLEANUP("cudaGetDevice failed"); - // printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat)); - printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat)); - return 1; - } - // CHECK: cudaStat = hipGetDeviceProperties( &prop, devId); - cudaStat = cudaGetDeviceProperties( &prop, devId); - // CHECK: if (hipSuccess != cudaStat) { - if (cudaSuccess != cudaStat) { - // CHECK: CLEANUP("hipGetDeviceProperties failed"); - CLEANUP("cudaGetDeviceProperties failed"); - // CHECK: printf("Error: cudaStat %d, %s\n", cudaStat, hipGetErrorString(cudaStat)); - printf("Error: cudaStat %d, %s\n", cudaStat, cudaGetErrorString(cudaStat)); - return 1; - } - int cc = 100*prop.major + 10*prop.minor; - if (cc < 130){ - CLEANUP("waive the test because only sm13 and above are supported\n"); - printf("the device has compute capability %d\n", cc); - printf("example test WAIVED"); - return 2; - } - /* exercise Level 1 routines (scatter vector elements) */ - // CHECK: status= hipsparseDsctr(handle, nnz_vector, xVal, xInd, - // CHECK: &y[n], HIPSPARSE_INDEX_BASE_ZERO); - status= cusparseDsctr(handle, nnz_vector, xVal, xInd, - &y[n], CUSPARSE_INDEX_BASE_ZERO); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Scatter from sparse to dense vector failed"); - return 1; - } - //y = [10 20 30 40 | 100 200 70 400] - /* exercise Level 2 routines (csrmv) */ - // CHECK: status= hipsparseDcsrmv(handle,HIPSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz, - status= cusparseDcsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, n, n, nnz, - &dtwo, descr, cooVal, csrRowPtr, cooColIndex, - &y[0], &dthree, &y[n]); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix-vector multiplication failed"); - return 1; - } - //y = [10 20 30 40 | 680 760 1230 2240] - // CHECK: hipMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), hipMemcpyDeviceToHost); - cudaMemcpy(yHostPtr, y, (size_t)(2*n*sizeof(y[0])), cudaMemcpyDeviceToHost); - /* exercise Level 3 routines (csrmm) */ - // cudaStat1 = hipMalloc((void**)&z, 2*(n+1)*sizeof(z[0])); - cudaStat1 = cudaMalloc((void**)&z, 2*(n+1)*sizeof(z[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Device malloc failed (z)"); - return 1; - } - // CHECK: cudaStat1 = hipMemset((void *)z,0, 2*(n+1)*sizeof(z[0])); - cudaStat1 = cudaMemset((void *)z,0, 2*(n+1)*sizeof(z[0])); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Memset on Device failed"); - return 1; - } - // CHECK: status= hipsparseDcsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, - status= cusparseDcsrmm(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, n, 2, n, - nnz, &dfive, descr, cooVal, csrRowPtr, cooColIndex, - y, n, &dzero, z, n+1); - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix-matrix multiplication failed"); - return 1; - } - /* print final results (z) */ - // CHECK: cudaStat1 = hipMemcpy(zHostPtr, z, - // CHECK: hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(zHostPtr, z, - (size_t)(2*(n+1)*sizeof(z[0])), - cudaMemcpyDeviceToHost); - // CHECK: if (cudaStat1 != hipSuccess) { - if (cudaStat1 != cudaSuccess) { - CLEANUP("Memcpy from Device to Host failed"); - return 1; - } - //z = [950 400 2550 2600 0 | 49300 15200 132300 131200 0] - /* destroy matrix descriptor */ - // status = hipsparseDestroyMatDescr(descr); - status = cusparseDestroyMatDescr(descr); - descr = 0; - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("Matrix descriptor destruction failed"); - return 1; - } - /* destroy handle */ - // CHECK: status = hipsparseDestroy(handle); - status = cusparseDestroy(handle); - handle = 0; - // CHECK: if (status != HIPSPARSE_STATUS_SUCCESS) { - if (status != CUSPARSE_STATUS_SUCCESS) { - CLEANUP("CUSPARSE Library release of resources failed"); - return 1; - } - /* check the results */ - // Notice that CLEANUP() contains a call to cusparseDestroy(handle) - if ((zHostPtr[0] != 950.0) || - (zHostPtr[1] != 400.0) || - (zHostPtr[2] != 2550.0) || - (zHostPtr[3] != 2600.0) || - (zHostPtr[4] != 0.0) || - (zHostPtr[5] != 49300.0) || - (zHostPtr[6] != 15200.0) || - (zHostPtr[7] != 132300.0) || - (zHostPtr[8] != 131200.0) || - (zHostPtr[9] != 0.0) || - (yHostPtr[0] != 10.0) || - (yHostPtr[1] != 20.0) || - (yHostPtr[2] != 30.0) || - (yHostPtr[3] != 40.0) || - (yHostPtr[4] != 680.0) || - (yHostPtr[5] != 760.0) || - (yHostPtr[6] != 1230.0) || - (yHostPtr[7] != 2240.0)) { - CLEANUP("example test FAILED"); - return 1; - } else { - CLEANUP("example test PASSED"); - return 0; - } -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu deleted file mode 100644 index 57b2c61098..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_02.cu +++ /dev/null @@ -1,284 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -void printMatrix(int m, int n, const double*A, int lda, const char* name) -{ - for(int row = 0 ; row < m ; row++){ - for(int col = 0 ; col < n ; col++){ - double Areg = A[row + col*lda]; - printf("%s(%d,%d) = %f\n", name, row+1, col+1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipsparseStatus_t cusparseStat = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t cusparseStat = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int n = 4; - const int nnzA = 9; -/* - * | 1 0 2 3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - * eigevales are { -0.5311, 7.5311, 9.0000, 4.0000 } - * - * The largest eigenvaluse is 9 and corresponding eigenvector is - * - * | 0.3029 | - * v = | 0 | - * | 0.9350 | - * | 0.1844 | - */ - const int csrRowPtrA[n+1] = { 0, 3, 4, 7, 9 }; - const int csrColIndA[nnzA] = {0, 2, 3, 1, 0, 2, 3, 1, 3 }; - const double csrValA[nnzA] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0 }; - const double lambda_exact[n] = { 9.0000, 7.5311, 4.0000, -0.5311 }; - const double x0[n] = {1.0, 2.0, 3.0, 4.0 }; /* initial guess */ - double x[n]; /* numerical eigenvector */ - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - double *d_csrValA = NULL; - - double *d_x = NULL; /* eigenvector */ - double *d_y = NULL; /* workspace */ - - const double tol = 1.e-6; - const int max_ites = 30; - - const double h_one = 1.0; - const double h_zero = 0.0; - - printf("example of csrmv_mp \n"); - printf("tol = %E \n", tol); - printf("max. iterations = %d \n", max_ites); - - printf("1st eigenvaluse is %f\n", lambda_exact[0] ); - printf("2nd eigenvaluse is %f\n", lambda_exact[1] ); - - double alpha = lambda_exact[1]/lambda_exact[0] ; - printf("convergence rate is %f\n", alpha ); - - double est_iterations = log(tol)/log(alpha); - printf("# of iterations required is %d\n", (int)ceil(est_iterations)); - - // step 1: create cublas/cusparse handle, bind a stream - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cusparseStat = hipsparseCreate(&cusparseH); - cusparseStat = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - // CHECK: cusparseStat = hipsparseSetStream(cusparseH, stream); - cusparseStat = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - - // step 2: configuration of matrix A - // CHECK: cusparseStat = hipsparseCreateMatDescr(&descrA); - cusparseStat = cusparseCreateMatDescr(&descrA); - // assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - // CHECK: hipsparseSetMatIndexBase(descrA,HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrA,CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL ); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL ); - - // step 3: copy A and x0 to device - // CHECK: cudaStat1 = hipMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) ); - cudaStat1 = cudaMalloc ((void**)&d_csrRowPtrA, sizeof(int) * (n+1) ); - // CHECK: cudaStat2 = hipMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA ); - cudaStat2 = cudaMalloc ((void**)&d_csrColIndA, sizeof(int) * nnzA ); - // CHECK: cudaStat3 = hipMalloc ((void**)&d_csrValA , sizeof(double) * nnzA ); - cudaStat3 = cudaMalloc ((void**)&d_csrValA , sizeof(double) * nnzA ); - // CHECK: cudaStat4 = hipMalloc ((void**)&d_x , sizeof(double) * n ); - cudaStat4 = cudaMalloc ((void**)&d_x , sizeof(double) * n ); - // CHECK: cudaStat5 = hipMalloc ((void**)&d_y , sizeof(double) * n ); - cudaStat5 = cudaMalloc ((void**)&d_y , sizeof(double) * n ); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int) * (n+1) , cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int) * nnzA , cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(d_csrValA , csrValA , sizeof(double) * nnzA , cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat3); - - // step 4: power method - double lambda = 0.0; - double lambda_next = 0.0; - - // 4.1: initial guess x0 - cudaStat1 = cudaMemcpy(d_x, x0, sizeof(double) * n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - for(int ite = 0 ; ite < max_ites ; ite++ ){ - // 4.2: normalize vector x - // x = x / |x| - double nrm2_x; - // TODO: cublasStat = hipblasDnrm2_v2(cublasH, - cublasStat = cublasDnrm2_v2(cublasH, - n, - d_x, - 1, // incx, - &nrm2_x /* host pointer */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - double one_over_nrm2_x = 1.0 / nrm2_x; - // TODO: cublasStat = hipblasDscal_v2( cublasH, - cublasStat = cublasDscal_v2( cublasH, - n, - &one_over_nrm2_x, /* host pointer */ - d_x, - 1 // incx - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - // 4.3: y = A*x - // TODO: hipsparseStat = cusparseDcsrmv_mp(cusparseH, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE - cusparseStat = cusparseDcsrmv_mp(cusparseH, - CUSPARSE_OPERATION_NON_TRANSPOSE, - n, - n, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_x, - &h_zero, - d_y); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == cusparseStat); - assert(CUSPARSE_STATUS_SUCCESS == cusparseStat); - - // 4.4: lambda = y**T*x - // TODO: cublasStat = hipblasDdot_v2 ( cublasH, - cublasStat = cublasDdot_v2 ( cublasH, - n, - d_x, - 1, // incx, - d_y, - 1, // incy, - &lambda_next /* host pointer */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - double lambda_err = fabs( lambda_next - lambda_exact[0] ); - printf("ite %d: lambda = %f, error = %E\n", ite, lambda_next, lambda_err ); - - // 4.5: check if converges - if ( (ite > 0) && - fabs( lambda - lambda_next ) < tol - ){ - break; // converges - } - - /* - * 4.6: x := y - * lambda = lambda_next - * - * so new approximation is (lambda, x), x is not normalized. - */ - // CHECK: cudaStat1 = hipMemcpy(d_x, d_y, sizeof(double) * n , hipMemcpyDeviceToDevice); - cudaStat1 = cudaMemcpy(d_x, d_y, sizeof(double) * n , cudaMemcpyDeviceToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - lambda = lambda_next; - } - // step 5: report eigen-pair - // CHECK: cudaStat1 = hipMemcpy(x, d_x, sizeof(double) * n, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(x, d_x, sizeof(double) * n, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("largest eigenvalue is %E\n", lambda ); - printf("eigenvector = (matlab base-1)\n"); - printMatrix(n, 1, x, n, "V0"); - printf("=====\n"); - - // free resources - // CHECK: if (d_csrRowPtrA ) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA ) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA ) hipFree(d_csrColIndA); - if (d_csrColIndA ) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA ) hipFree(d_csrValA); - if (d_csrValA ) cudaFree(d_csrValA); - // CHECK: if (d_x ) hipFree(d_x); - if (d_x ) cudaFree(d_x); - // CHeCK: if (d_y ) hipFree(d_y); - if (d_y ) cudaFree(d_y); - // CHECK: if (cublasH ) hipblasDestroy(cublasH); - if (cublasH ) cublasDestroy(cublasH); - // CHECK: if (cusparseH ) hipsparseDestroy(cusparseH); - if (cusparseH ) cusparseDestroy(cusparseH); - // CHECK: if (stream ) hipStreamDestroy(stream); - if (stream ) cudaStreamDestroy(stream); - // CHECK: if (descrA ) hipsparseDestroyMatDescr(descrA); - if (descrA ) cusparseDestroyMatDescr(descrA); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu deleted file mode 100644 index 8618be4748..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_03.cu +++ /dev/null @@ -1,229 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include "hipsparse.h" -#include "cusparse.h" - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - // CHECK: hipError_t cudaStat6 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - cudaError_t cudaStat6 = cudaSuccess; - - /* - * A is a 3x3 sparse matrix - * | 1 2 0 | - * A = | 0 5 0 | - * | 0 8 0 | - */ - const int m = 3; - const int n = 3; - const int nnz = 4; - -#if 0 - /* index starts at 0 */ - int h_cooRows[nnz] = { 2, 1, 0, 0 }; - int h_cooCols[nnz] = { 1, 1, 0, 1 }; -#else - /* index starts at -2 */ - int h_cooRows[nnz] = { 0, -1, -2, -2 }; - int h_cooCols[nnz] = { -1, -1, -2, -1 }; -#endif - double h_cooVals[nnz] = { 8.0, 5.0, 1.0, 2.0 }; - int h_P[nnz]; - - int *d_cooRows = NULL; - int *d_cooCols = NULL; - int *d_P = NULL; - double *d_cooVals = NULL; - double *d_cooVals_sorted = NULL; - size_t pBufferSizeInBytes = 0; - void *pBuffer = NULL; - - printf("m = %d, n = %d, nnz=%d \n", m, n, nnz); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: allocate buffer */ - // TODO: status = hipsparseXcoosort_bufferSizeExt( - status = cusparseXcoosort_bufferSizeExt( - handle, - m, - n, - nnz, - d_cooRows, - d_cooCols, - &pBufferSizeInBytes - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("pBufferSizeInBytes = %lld bytes \n", (long long)pBufferSizeInBytes); - - // CHECK: cudaStat1 = hipMalloc(&d_cooRows, sizeof(int)*nnz); - cudaStat1 = cudaMalloc(&d_cooRows, sizeof(int)*nnz); - // CHECK: cudaStat2 = hipMalloc(&d_cooCols, sizeof(int)*nnz); - cudaStat2 = cudaMalloc(&d_cooCols, sizeof(int)*nnz); - // CHECK: cudaStat3 = hipMalloc(&d_P, sizeof(int)*nnz); - cudaStat3 = cudaMalloc(&d_P, sizeof(int)*nnz); - // CHECK: cudaStat4 = hipMalloc(&d_cooVals, sizeof(double)*nnz); - cudaStat4 = cudaMalloc(&d_cooVals, sizeof(double)*nnz); - // CHECK: cudaStat5 = hipMalloc(&d_cooVals_sorted, sizeof(double)*nnz); - cudaStat5 = cudaMalloc(&d_cooVals_sorted, sizeof(double)*nnz); - // CHECK: cudaStat6 = hipMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); - cudaStat6 = cudaMalloc(&pBuffer, sizeof(char)* pBufferSizeInBytes); - - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - // CHECK: assert(hipSuccess == cudaStat6); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - assert(cudaSuccess == cudaStat6); - - // CHECK: cudaStat1 = hipMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_cooRows, h_cooRows, sizeof(int)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat2 = hipMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, hipMemcpyHostToDevice); - cudaStat2 = cudaMemcpy(d_cooCols, h_cooCols, sizeof(int)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat3 = hipMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, hipMemcpyHostToDevice); - cudaStat3 = cudaMemcpy(d_cooVals, h_cooVals, sizeof(double)*nnz, cudaMemcpyHostToDevice); - // CHECK: cudaStat4 = hipDeviceSynchronize(); - cudaStat4 = cudaDeviceSynchronize(); - - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - - /* step 3: setup permutation vector P to identity */ - // TODO: status = hipsparseCreateIdentityPermutation( - status = cusparseCreateIdentityPermutation( - handle, - nnz, - d_P); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 4: sort COO format by Row */ - // TODO: status = hipsparseXcoosortByRow( - status = cusparseXcoosortByRow( - handle, - m, - n, - nnz, - d_cooRows, - d_cooCols, - d_P, - pBuffer - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 5: gather sorted cooVals */ - // CHECK: status = hipsparseDgthr( - // CHECK: HIPSPARSE_INDEX_BASE_ZERO - status = cusparseDgthr( - handle, - nnz, - d_cooVals, - d_cooVals_sorted, - d_P, - CUSPARSE_INDEX_BASE_ZERO - ); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* wait until the computation is done */ - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: cudaStat2 = hipMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(h_cooRows, d_cooRows, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(h_cooCols, d_cooCols, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat4 = hipMemcpy(h_P, d_P, sizeof(int)*nnz, hipMemcpyDeviceToHost); - cudaStat4 = cudaMemcpy(h_P, d_P, sizeof(int)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat5 = hipMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, hipMemcpyDeviceToHost); - cudaStat5 = cudaMemcpy(h_cooVals, d_cooVals_sorted, sizeof(double)*nnz, cudaMemcpyDeviceToHost); - // CHECK: cudaStat6 = hipDeviceSynchronize(); - cudaStat6 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - // CHECK: assert(hipSuccess == cudaStat4); - // CHECK: assert(hipSuccess == cudaStat5); - // CHECK: assert(hipSuccess == cudaStat6); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - assert(cudaSuccess == cudaStat4); - assert(cudaSuccess == cudaStat5); - assert(cudaSuccess == cudaStat6); - - printf("sorted coo: \n"); - for (int j = 0; j < nnz; j++) { - printf("(%d, %d, %f) \n", h_cooRows[j], h_cooCols[j], h_cooVals[j]); - } - - for (int j = 0; j < nnz; j++) { - printf("P[%d] = %d \n", j, h_P[j]); - } - - /* free resources */ - // CHECK: if (d_cooRows) hipFree(d_cooRows); - if (d_cooRows) cudaFree(d_cooRows); - // CHECK: if (d_cooCols) hipFree(d_cooCols); - if (d_cooCols) cudaFree(d_cooCols); - // CHECK: if (d_P) hipFree(d_P); - if (d_P) cudaFree(d_P); - // CHECK: if (d_cooVals) hipFree(d_cooVals); - if (d_cooVals) cudaFree(d_cooVals); - // CHECK: if (d_cooVals_sorted) hipFree(d_cooVals_sorted); - if (d_cooVals_sorted) cudaFree(d_cooVals_sorted); - // CHECK: if (pBuffer) hipFree(pBuffer); - if (pBuffer) cudaFree(pBuffer); - // if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu deleted file mode 100644 index 18ba9b006c..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_04.cu +++ /dev/null @@ -1,261 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printMatrix(int m, int n, const float*A, int lda, const char* name) -{ - for (int row = 0; row < m; row++) { - for (int col = 0; col < n; col++) { - float Areg = A[row + col * lda]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int m = 4; - const int n = 4; - const int lda = m; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - float *d_A = NULL; - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float threshold = 4.1; /* remove Aij <= 4.1 */ -// float threshold = 0; /* remove zeros */ - - printf("example of pruneDense2csr \n"); - - printf("prune |A(i,j)| <= threshold \n"); - printf("threshold = %E \n", threshold); - - printMatrix(m, n, A, lda, "A"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); - cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - - /* step 3: query workspace */ - // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneDense2csr_bufferSizeExt( - status = cusparseSpruneDense2csr_bufferSizeExt( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneDense2csrNnz( - status = cusparseSpruneDense2csrNnz( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - // TODO: status = hipsparseSpruneDense2csr( - status = cusparseSpruneDense2csr( - handle, - m, - n, - d_A, - lda, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_A) hipFree(d_A); - if (d_A) cudaFree(d_A); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu deleted file mode 100644 index 1155af09e1..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_05.cu +++ /dev/null @@ -1,288 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int m = 4; - const int n = 4; - const int nnzA = 9; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - - const int csrRowPtrA[m + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float threshold = 4.1; /* remove Aij <= 4.1 */ -// float threshold = 0; /* remove zeros */ - - printf("example of pruneCsr2csr \n"); - - printf("prune |A(i,j)| <= threshold \n"); - printf("threshold = %E \n", threshold); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix A and C */ - // CHECK: status = hipsparseCreateMatDescr(&descrA); - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* C is base-0 */ - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - - printCsr(m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, "A"); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneCsr2csr_bufferSizeExt( - status = cusparseSpruneCsr2csr_bufferSizeExt( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes (prune) = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneCsr2csrNnz( - status = cusparseSpruneCsr2csrNnz( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneCsr2csr( - status = cusparseSpruneCsr2csr( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - &threshold, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu deleted file mode 100644 index 872750a5d5..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_06.cu +++ /dev/null @@ -1,269 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printMatrix(int m, int n, const float*A, int lda, const char* name) -{ - for (int row = 0; row < m; row++) { - for (int col = 0; col < n; col++) { - float Areg = A[row + col * lda]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - pruneInfo_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - // CHECK: hipError_t cudaStat2 = hipSuccess; - // CHECK: hipError_t cudaStat3 = hipSuccess; - // CHECK: hipError_t cudaStat4 = hipSuccess; - // CHECK: hipError_t cudaStat5 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - cudaError_t cudaStat2 = cudaSuccess; - cudaError_t cudaStat3 = cudaSuccess; - cudaError_t cudaStat4 = cudaSuccess; - cudaError_t cudaStat5 = cudaSuccess; - const int m = 4; - const int n = 4; - const int lda = m; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - const float A[lda*n] = { 1, 0, 5, 0, 0, 4, 0, 8, 2, 0, 6, 0, -3, 0, 7, 9 }; - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - float *d_A = NULL; - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float percentage = 50; /* 50% of nnz */ - - printf("example of pruneDense2csrByPercentage \n"); - - printf("prune out %.1f percentage of A \n", percentage); - - printMatrix(m, n, A, lda, "A"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // TODO: status = hipsparseCreatePruneInfo(&info); - status = cusparseCreatePruneInfo(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: cudaStat1 = hipMalloc((void**)&d_A, sizeof(float)*lda*n); - cudaStat1 = cudaMalloc((void**)&d_A, sizeof(float)*lda*n); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat2 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - // CHECK: cudaStat1 = hipMemcpy(d_A, A, sizeof(float)*lda*n, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_A, A, sizeof(float)*lda*n, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneDense2csrByPercentage_bufferSizeExt( - status = cusparseSpruneDense2csrByPercentage_bufferSizeExt( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneDense2csrNnzByPercentage( - status = cusparseSpruneDense2csrNnzByPercentage( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: cudaStat2 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat2 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - assert(cudaSuccess == cudaStat2); - // TODO: status = hipsparseSpruneDense2csrByPercentage( - status = cusparseSpruneDense2csrByPercentage( - handle, - m, - n, - d_A, - lda, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 7: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: cudaStat2 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat2 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: cudaStat3 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat3 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - // CHECK: assert(hipSuccess == cudaStat2); - // CHECK: assert(hipSuccess == cudaStat3); - assert(cudaSuccess == cudaStat1); - assert(cudaSuccess == cudaStat2); - assert(cudaSuccess == cudaStat3); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_A) hipFree(d_A); - if (d_A) cudaFree(d_A); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // TODO: if (info) hipsparseDestroyPruneInfo(info); - if (info) cusparseDestroyPruneInfo(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} - diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu deleted file mode 100644 index cc938d7da4..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_07.cu +++ /dev/null @@ -1,302 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -void printCsr( - int m, - int n, - int nnz, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrValA, - const int *csrRowPtrA, - const int *csrColIndA, - const char* name) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - - printf("matrix %s is %d-by-%d, nnz=%d, base=%d, output base-1\n", name, m, n, nnz, base); - for (int row = 0; row < m; row++) { - const int start = csrRowPtrA[row] - base; - const int end = csrRowPtrA[row + 1] - base; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColIndA[colidx] - base; - const float Areg = csrValA[colidx]; - printf("%s(%d,%d) = %f\n", name, row + 1, col + 1, Areg); - } - } -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // CHECK: hipsparseMatDescr_t descrC = NULL; - cusparseMatDescr_t descrC = NULL; - pruneInfo_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int m = 4; - const int n = 4; - const int nnzA = 9; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - */ - - const int csrRowPtrA[m + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - - int* csrRowPtrC = NULL; - int* csrColIndC = NULL; - float* csrValC = NULL; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - - int *d_csrRowPtrC = NULL; - int *d_csrColIndC = NULL; - float *d_csrValC = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - int nnzC = 0; - - float percentage = 20; /* remove 20% of nonzeros */ - - printf("example of pruneCsr2csrByPercentage \n"); - - printf("prune %.1f percent of nonzeros \n", percentage); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(handle, stream); - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // TODO: status = hipsparseCreatePruneInfo(&info); - status = cusparseCreatePruneInfo(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix C */ - // CHECK: status = hipsparseCreateMatDescr(&descrA); - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - // CHECK: status = hipsparseCreateMatDescr(&descrC); - status = cusparseCreateMatDescr(&descrC); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* C is base-0 */ - // CHECK: hipsparseSetMatIndexBase(descrC, HIPSPARSE_INDEX_BASE_ZERO); - cusparseSetMatIndexBase(descrC, CUSPARSE_INDEX_BASE_ZERO); - // CHECK: hipsparseSetMatType(descrC, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrC, CUSPARSE_MATRIX_TYPE_GENERAL); - - printCsr(m, n, nnzA, descrA, csrValA, csrRowPtrA, csrColIndA, "A"); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrC, sizeof(int)*(m + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(m + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // TODO: status = hipsparseSpruneCsr2csrByPercentage_bufferSizeExt( - status = cusparseSpruneCsr2csrByPercentage_bufferSizeExt( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: compute csrRowPtrC and nnzC */ - // TODO: status = hipsparseSpruneCsr2csrNnzByPercentage( - status = cusparseSpruneCsr2csrNnzByPercentage( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrRowPtrC, - &nnzC, /* host */ - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printf("nnzC = %d\n", nnzC); - if (0 == nnzC) { - printf("C is empty \n"); - return 0; - } - - /* step 5: compute csrColIndC and csrValC */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrColIndC, sizeof(int) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - cudaStat1 = cudaMalloc((void**)&d_csrValC, sizeof(float) * nnzC); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // TODO: status = hipsparseSpruneCsr2csrByPercentage( - status = cusparseSpruneCsr2csrByPercentage( - handle, - m, - n, - nnzA, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - percentage, - descrC, - d_csrValC, - d_csrRowPtrC, - d_csrColIndC, - info, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: output C */ - csrRowPtrC = (int*)malloc(sizeof(int)*(m + 1)); - csrColIndC = (int*)malloc(sizeof(int)*nnzC); - csrValC = (float*)malloc(sizeof(float)*nnzC); - assert(NULL != csrRowPtrC); - assert(NULL != csrColIndC); - assert(NULL != csrValC); - // CHECK: cudaStat1 = hipMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrRowPtrC, d_csrRowPtrC, sizeof(int)*(m + 1), cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrColIndC, d_csrColIndC, sizeof(int)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(csrValC, d_csrValC, sizeof(float)*nnzC, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - printCsr(m, n, nnzC, descrC, csrValC, csrRowPtrC, csrColIndC, "C"); - - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_csrRowPtrC) hipFree(d_csrRowPtrC); - if (d_csrRowPtrC) cudaFree(d_csrRowPtrC); - // CHECK: if (d_csrColIndC) hipFree(d_csrColIndC); - if (d_csrColIndC) cudaFree(d_csrColIndC); - // CHECK: if (d_csrValC) hipFree(d_csrValC); - if (d_csrValC) cudaFree(d_csrValC); - - if (csrRowPtrC) free(csrRowPtrC); - if (csrColIndC) free(csrColIndC); - if (csrValC) free(csrValC); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // CHECK: if (descrC) hipsparseDestroyMatDescr(descrC); - if (descrC) cusparseDestroyMatDescr(descrC); - // TODO: if (info) hipsparseDestroyPruneInfo(info); - if (info) cusparseDestroyPruneInfo(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu deleted file mode 100644 index 2c826935ef..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_08.cu +++ /dev/null @@ -1,413 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *dl, - const float *d, - const float *du, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 3; - const int batchSize = 2; - /* - * | 1 6 0 | | 1 | | -0.603960 | - * A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 | - * | 0 5 3 | | 3 | | 0.554455 | - * - * | 8 13 0 | | 4 | | -0.063291 | - * A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 | - * | 0 12 10 | | 6 | | 0.184031 | - */ - - /* - * A = (dl, d, du), B and X are in aggregate format - */ - const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 }; - const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 }; - const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_dl0, d_d0, d_du0) is aggregate format - * (d_dl, d_d, d_du) is interleaved format - */ - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - /* - * algo = 0: cuThomas (unstable) - * algo = 1: LU with pivoting (stable) - * algo = 2: QR (stable) - */ - const int algo = 2; - - const float h_one = 1; - const float h_zero = 0; - - printf("example of gtsv (interleaved format) \n"); - printf("choose algo = 0,1,2 to select different algorithms \n"); - printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - //CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt( - status = cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch( - status = cusparseSgtsvInterleavedBatch( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - dl, - d, - du, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - dl + n, - d + n, - du + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu deleted file mode 100644 index 3bcbd96bb0..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_09.cu +++ /dev/null @@ -1,414 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *dl, - const float *d, - const float *du, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 3; - const int batchSize = 2; - /* - * | 1 6 0 | | 1 | | -0.603960 | - * A1 =| 4 2 7 |, b1 = | 2 |, x1 = | 0.267327 | - * | 0 5 3 | | 3 | | 0.554455 | - * - * | 8 13 0 | | 4 | | -0.063291 | - * A2 =| 11 9 14 |, b2 = | 5 |, x2 = | 0.346641 | - * | 0 12 10 | | 6 | | 0.184031 | - */ - - /* - * A = (dl, d, du), B and X are in aggregate format - */ - const float dl[n * batchSize] = { 0, 4, 5, 0, 11, 12 }; - const float d[n * batchSize] = { 1, 2, 3, 8, 9, 10 }; - const float du[n * batchSize] = { 6, 7, 0, 13, 14, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_dl0, d_d0, d_du0) is aggregate format - * (d_dl, d_d, d_du) is interleaved format - */ - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - /* - * algo = 0: cuThomas (unstable) - * algo = 1: LU with pivoting (stable) - * algo = 2: QR (stable) - */ - const int algo = 2; - - const float h_one = 1; - const float h_zero = 0; - - printf("example of gtsv (interleaved format) \n"); - printf("choose algo = 0,1,2 to select different algorithms \n"); - printf("n = %d, batchSize = %d, algo = %d \n", n, batchSize, algo); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't care */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch_bufferSizeExt( - status = cusparseSgtsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgtsvInterleavedBatch( - status = cusparseSgtsvInterleavedBatch( - cusparseH, - algo, - n, - d_dl, - d_d, - d_du, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T - // CHECK: HIPBLAS_OP_T - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - dl, - d, - du, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - dl + n, - d + n, - du + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu deleted file mode 100644 index 2ab2d605de..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_10.cu +++ /dev/null @@ -1,507 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* - * compute | b - A*x|_inf - */ -void residaul_eval( - int n, - const float *ds, - const float *dl, - const float *d, - const float *du, - const float *dw, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - float r_nrminf = 0; - for (int i = 0; i < n; i++) { - float dot = 0; - if (i > 1) { - dot += ds[i] * x[i - 2]; - } - if (i > 0) { - dot += dl[i] * x[i - 1]; - } - dot += d[i] * x[i]; - if (i < (n - 1)) { - dot += du[i] * x[i + 1]; - } - if (i < (n - 2)) { - dot += dw[i] * x[i + 2]; - } - float ri = b[i] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t cusparseH = NULL; - cusparseHandle_t cusparseH = NULL; - // CHECK: hipblasHandle_t cublasH = NULL; - cublasHandle_t cublasH = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipblasStatus_t cublasStat = HIPBLAS_STATUS_SUCCESS; - cublasStatus_t cublasStat = CUBLAS_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - - const int n = 4; - const int batchSize = 2; - - /* - * | 1 8 13 0 | | 1 | | -0.0592 | - * A1 =| 5 2 9 14 |, b1 = | 2 |, x1 = | 0.3428 | - * | 11 6 3 10 | | 3 | | -0.1295 | - * | 0 12 7 4 | | 4 | | 0.1982 | - * - * | 15 22 27 0 | | 5 | | -0.0012 | - * A2 =| 19 16 23 28 |, b2 = | 6 |, x2 = | 0.2792 | - * | 25 20 17 24 | | 7 | | -0.0416 | - * | 0 26 21 18 | | 8 | | 0.0898 | - */ - - /* - * A = (ds, dl, d, du, dw), B and X are in aggregate format - */ - const float ds[n * batchSize] = { 0, 0, 11, 12, 0, 0, 25, 26 }; - const float dl[n * batchSize] = { 0, 5, 6, 7, 0, 19, 20, 21 }; - const float d[n * batchSize] = { 1, 2, 3, 4, 15, 16, 17, 18 }; - const float du[n * batchSize] = { 8, 9, 10, 0, 22, 23, 24, 0 }; - const float dw[n * batchSize] = { 13,14, 0, 0, 27, 28, 0, 0 }; - const float B[n * batchSize] = { 1, 2, 3, 4, 5, 6, 7, 8 }; - float X[n * batchSize]; /* Xj = Aj \ Bj */ - -/* device memory - * (d_ds0, d_dl0, d_d0, d_du0, d_dw0) is aggregate format - * (d_ds, d_dl, d_d, d_du, d_dw) is interleaved format - */ - float *d_ds0 = NULL; - float *d_dl0 = NULL; - float *d_d0 = NULL; - float *d_du0 = NULL; - float *d_dw0 = NULL; - float *d_ds = NULL; - float *d_dl = NULL; - float *d_d = NULL; - float *d_du = NULL; - float *d_dw = NULL; - float *d_B = NULL; - float *d_X = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - const float h_one = 1; - const float h_zero = 0; - - int algo = 0; /* QR factorization */ - - printf("example of gpsv (interleaved format) \n"); - printf("n = %d, batchSize = %d\n", n, batchSize); - - /* step 1: create cusparse/cublas handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&cusparseH); - status = cusparseCreate(&cusparseH); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: status = hipsparseSetStream(cusparseH, stream); - status = cusparseSetStream(cusparseH, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cublasStat = hipblasCreate(&cublasH); - cublasStat = cublasCreate(&cublasH); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: cublasStat = hipblasSetStream(cublasH, stream); - cublasStat = cublasSetStream(cublasH, stream); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* step 2: allocate device memory */ - // CHECK: cudaStat1 = hipMalloc((void**)&d_ds0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_ds0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dw0, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dw0, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_ds, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_ds, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dl, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_d, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_d, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_du, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_du, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_dw, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_dw, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_X, sizeof(float)*n*batchSize); - cudaStat1 = cudaMalloc((void**)&d_X, sizeof(float)*n*batchSize); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 3: prepare data in device, interleaved format */ - // CHECK: cudaStat1 = hipMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_ds0, ds, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dl0, dl, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_d0, d, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_d0, d, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_du0, du, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_du0, du, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_dw0, dw, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*batchSize, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*batchSize, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - /* convert ds to interleaved format - * ds = transpose(ds0) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of ds */ - n, /* number of columns of ds */ - &h_one, - d_ds0, /* ds0 is n-by-batchSize */ - n, /* leading dimension of ds0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_ds, /* ds is batchSize-by-n */ - batchSize); /* leading dimension of ds */ - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert dl to interleaved format - * dl = transpose(dl0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dl */ - n, /* number of columns of dl */ - &h_one, - d_dl0, /* dl0 is n-by-batchSize */ - n, /* leading dimension of dl0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dl, /* dl is batchSize-by-n */ - batchSize /* leading dimension of dl */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert d to interleaved format - * d = transpose(d0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of d */ - n, /* number of columns of d */ - &h_one, - d_d0, /* d0 is n-by-batchSize */ - n, /* leading dimension of d0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_d, /* d is batchSize-by-n */ - batchSize /* leading dimension of d */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert du to interleaved format - * du = transpose(du0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of du */ - n, /* number of columns of du */ - &h_one, - d_du0, /* du0 is n-by-batchSize */ - n, /* leading dimension of du0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_du, /* du is batchSize-by-n */ - batchSize /* leading dimension of du */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - /* convert dw to interleaved format - * dw = transpose(dw0) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of dw */ - n, /* number of columns of dw */ - &h_one, - d_dw0, /* dw0 is n-by-batchSize */ - n, /* leading dimension of dw0 */ - &h_zero, - NULL, - n, /* don't cae */ - d_dw, /* dw is batchSize-by-n */ - batchSize /* leading dimension of dw */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* convert B to interleaved format - * X = transpose(B) - */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - batchSize, /* number of rows of X */ - n, /* number of columns of X */ - &h_one, - d_B, /* B is n-by-batchSize */ - n, /* leading dimension of B */ - &h_zero, - NULL, - n, /* don't cae */ - d_X, /* X is batchSize-by-n */ - batchSize /* leading dimension of X */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - - /* step 4: prepare workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgpsvInterleavedBatch_bufferSizeExt( - status = cusparseSgpsvInterleavedBatch_bufferSizeExt( - cusparseH, - algo, - n, - d_ds, - d_dl, - d_d, - d_du, - d_dw, - d_X, - batchSize, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - /* step 5: solve Aj*xj = bj */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseSgpsvInterleavedBatch( - status = cusparseSgpsvInterleavedBatch( - cusparseH, - algo, - n, - d_ds, - d_dl, - d_d, - d_du, - d_dw, - d_X, - batchSize, - d_work); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6: convert X back to aggregate format */ - /* B = transpose(X) */ - // CHECK: cublasStat = hipblasSgeam( - // CHECK: HIPBLAS_OP_T, - // CHECK: HIPBLAS_OP_T, - cublasStat = cublasSgeam( - cublasH, - CUBLAS_OP_T, /* transa */ - CUBLAS_OP_T, /* transb, don't care */ - n, /* number of rows of B */ - batchSize, /* number of columns of B */ - &h_one, - d_X, /* X is batchSize-by-n */ - batchSize, /* leading dimension of X */ - &h_zero, - NULL, - n, /* don't cae */ - d_B, /* B is n-by-batchSize */ - n /* leading dimension of B */ - ); - // CHECK: assert(HIPBLAS_STATUS_SUCCESS == cublasStat); - assert(CUBLAS_STATUS_SUCCESS == cublasStat); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - /* step 7: residual evaluation */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*batchSize, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*batchSize, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A1)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - - float r1_nrminf; - residaul_eval( - n, - ds, - dl, - d, - du, - dw, - B, - X, - &r1_nrminf - ); - printf("|b1 - A1*x1| = %E\n", r1_nrminf); - printf("\n==== x2 = inv(A2)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - - float r2_nrminf; - residaul_eval( - n, - ds + n, - dl + n, - d + n, - du + n, - dw + n, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A2*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_ds0) hipFree(d_ds0); - if (d_ds0) cudaFree(d_ds0); - // CHECK: if (d_dl0) hipFree(d_dl0); - if (d_dl0) cudaFree(d_dl0); - // CHECK: if (d_d0) hipFree(d_d0); - if (d_d0) cudaFree(d_d0); - // CHECK: if (d_du0) hipFree(d_du0); - if (d_du0) cudaFree(d_du0); - // CHECK: if (d_dw0) hipFree(d_dw0); - if (d_dw0) cudaFree(d_dw0); - // CHECK: if (d_ds) hipFree(d_ds); - if (d_ds) cudaFree(d_ds); - // CHECK: if (d_dl) hipFree(d_dl); - if (d_dl) cudaFree(d_dl); - // CHECK: if (d_d) hipFree(d_d); - if (d_d) cudaFree(d_d); - // CHECK: if (d_du) hipFree(d_du); - if (d_du) cudaFree(d_du); - // CHECK: if (d_dw) hipFree(d_dw); - if (d_dw) cudaFree(d_dw); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (d_X) hipFree(d_X); - if (d_X) cudaFree(d_X); - // CHECK: if (cusparseH) hipsparseDestroy(cusparseH); - if (cusparseH) cusparseDestroy(cusparseH); - // CHECK: if (cublasH) hipblasDestroy(cublasH); - if (cublasH) cublasDestroy(cublasH); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu deleted file mode 100644 index 868f3be69e..0000000000 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_11.cu +++ /dev/null @@ -1,327 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -#include -#include -#include -// CHECK: #include -#include -// CHECK: #include -#include - -// NOTE: CUDA 10.0 - -/* compute | b - A*x|_inf */ -void residaul_eval( - int n, - // CHECK: const hipsparseMatDescr_t descrA, - const cusparseMatDescr_t descrA, - const float *csrVal, - const int *csrRowPtr, - const int *csrColInd, - const float *b, - const float *x, - float *r_nrminf_ptr) -{ - // CHECK: const int base = (hipsparseGetMatIndexBase(descrA) != HIPSPARSE_INDEX_BASE_ONE) ? 0 : 1; - const int base = (cusparseGetMatIndexBase(descrA) != CUSPARSE_INDEX_BASE_ONE) ? 0 : 1; - // CHECK: const int lower = (HIPSPARSE_FILL_MODE_LOWER == hipsparseGetMatFillMode(descrA)) ? 1 : 0; - const int lower = (CUSPARSE_FILL_MODE_LOWER == cusparseGetMatFillMode(descrA)) ? 1 : 0; - // CHECK: const int unit = (HIPSPARSE_DIAG_TYPE_UNIT == hipsparseGetMatDiagType(descrA)) ? 1 : 0; - const int unit = (CUSPARSE_DIAG_TYPE_UNIT == cusparseGetMatDiagType(descrA)) ? 1 : 0; - - float r_nrminf = 0; - for (int row = 0; row < n; row++) { - const int start = csrRowPtr[row] - base; - const int end = csrRowPtr[row + 1] - base; - float dot = 0; - for (int colidx = start; colidx < end; colidx++) { - const int col = csrColInd[colidx] - base; - float Aij = csrVal[colidx]; - float xj = x[col]; - if ((row == col) && unit) { - Aij = 1.0; - } - int valid = (row >= col) && lower || - (row <= col) && !lower; - if (valid) { - dot += Aij * xj; - } - } - float ri = b[row] - dot; - r_nrminf = (r_nrminf > fabs(ri)) ? r_nrminf : fabs(ri); - } - *r_nrminf_ptr = r_nrminf; -} - -int main(int argc, char*argv[]) -{ - // CHECK: hipsparseHandle_t handle = NULL; - cusparseHandle_t handle = NULL; - // CHECK: hipStream_t stream = NULL; - cudaStream_t stream = NULL; - // CHECK: hipsparseMatDescr_t descrA = NULL; - cusparseMatDescr_t descrA = NULL; - // NOTE: CUDA 10.0 - // TODO: csrsm2Info_t info = NULL; - csrsm2Info_t info = NULL; - // CHECK: hipsparseStatus_t status = HIPSPARSE_STATUS_SUCCESS; - cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; - // CHECK: hipError_t cudaStat1 = hipSuccess; - cudaError_t cudaStat1 = cudaSuccess; - const int nrhs = 2; - const int n = 4; - const int nnzA = 9; - // CHECK: const hipsparseSolvePolicy_t policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL; - const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_NO_LEVEL; - const float h_one = 1.0; - /* - * | 1 0 2 -3 | - * | 0 4 0 0 | - * A = | 5 0 6 7 | - * | 0 8 0 9 | - * - * Regard A as a lower triangle matrix L with non-unit diagonal. - * | 1 5 | | 1 5 | - * Given B = | 2 6 |, X = L \ B = | 0.5 1.5 | - * | 3 7 | | -0.3333 -3 | - * | 4 8 | | 0 -0.4444 | - */ - const int csrRowPtrA[n + 1] = { 1, 4, 5, 8, 10 }; - const int csrColIndA[nnzA] = { 1, 3, 4, 2, 1, 3, 4, 2, 4 }; - const float csrValA[nnzA] = { 1, 2, -3, 4, 5, 6, 7, 8, 9 }; - const float B[n*nrhs] = { 1,2,3,4,5,6,7,8 }; - float X[n*nrhs]; - - int *d_csrRowPtrA = NULL; - int *d_csrColIndA = NULL; - float *d_csrValA = NULL; - float *d_B = NULL; - - size_t lworkInBytes = 0; - char *d_work = NULL; - - const int algo = 0; /* non-block version */ - - printf("example of csrsm2 \n"); - - /* step 1: create cusparse handle, bind a stream */ - // CHECK: cudaStat1 = hipStreamCreateWithFlags(&stream, hipStreamNonBlocking); - cudaStat1 = cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: status = hipsparseCreate(&handle); - status = cusparseCreate(&handle); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - status = cusparseSetStream(handle, stream); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - // NOTE: CUDA 10.0 - // TODO: status = hipsparseCreateCsrsm2Info(&info); - status = cusparseCreateCsrsm2Info(&info); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - /* step 2: configuration of matrix A */ - status = cusparseCreateMatDescr(&descrA); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* A is base-1*/ - // CHECK: hipsparseSetMatIndexBase(descrA, HIPSPARSE_INDEX_BASE_ONE); - cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ONE); - // CHECK: hipsparseSetMatType(descrA, HIPSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL); - /* A is lower triangle */ - // CHECK: hipsparseSetMatFillMode(descrA, HIPSPARSE_FILL_MODE_LOWER); - cusparseSetMatFillMode(descrA, CUSPARSE_FILL_MODE_LOWER); - /* A has non unit diagonal */ - // CHECK: hipsparseSetMatDiagType(descrA, HIPSPARSE_DIAG_TYPE_NON_UNIT); - cusparseSetMatDiagType(descrA, CUSPARSE_DIAG_TYPE_NON_UNIT); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1)); - cudaStat1 = cudaMalloc((void**)&d_csrRowPtrA, sizeof(int)*(n + 1)); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrColIndA, sizeof(int)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - cudaStat1 = cudaMalloc((void**)&d_csrValA, sizeof(float)*nnzA); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMalloc((void**)&d_B, sizeof(float)*n*nrhs); - cudaStat1 = cudaMalloc((void**)&d_B, sizeof(float)*n*nrhs); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrRowPtrA, csrRowPtrA, sizeof(int)*(n + 1), cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrColIndA, csrColIndA, sizeof(int)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_csrValA, csrValA, sizeof(float)*nnzA, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: cudaStat1 = hipMemcpy(d_B, B, sizeof(float)*n*nrhs, hipMemcpyHostToDevice); - cudaStat1 = cudaMemcpy(d_B, B, sizeof(float)*n*nrhs, cudaMemcpyHostToDevice); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 3: query workspace */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_bufferSizeExt( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_bufferSizeExt( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - &lworkInBytes); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - - printf("lworkInBytes = %lld \n", (long long)lworkInBytes); - // CHECK: if (NULL != d_work) { hipFree(d_work); } - if (NULL != d_work) { cudaFree(d_work); } - // CHECK: cudaStat1 = hipMalloc((void**)&d_work, lworkInBytes); - cudaStat1 = cudaMalloc((void**)&d_work, lworkInBytes); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 4: analysis */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_analysis( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_analysis( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - /* step 5: solve L * X = B */ - // NOTE: CUDA 10.0 - // TODO: status = hipsparseScsrsm2_solve( - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - // CHECK: HIPSPARSE_OPERATION_NON_TRANSPOSE, - status = cusparseScsrsm2_solve( - handle, - algo, - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transA */ - CUSPARSE_OPERATION_NON_TRANSPOSE, /* transB */ - n, - nrhs, - nnzA, - &h_one, - descrA, - d_csrValA, - d_csrRowPtrA, - d_csrColIndA, - d_B, - n, /* ldb */ - info, - policy, - d_work); - // CHECK: assert(HIPSPARSE_STATUS_SUCCESS == status); - assert(CUSPARSE_STATUS_SUCCESS == status); - // CHECK: cudaStat1 = hipDeviceSynchronize(); - cudaStat1 = cudaDeviceSynchronize(); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - - /* step 6:measure residual B - A*X */ - // CHECK: cudaStat1 = hipMemcpy(X, d_B, sizeof(float)*n*nrhs, hipMemcpyDeviceToHost); - cudaStat1 = cudaMemcpy(X, d_B, sizeof(float)*n*nrhs, cudaMemcpyDeviceToHost); - // CHECK: assert(hipSuccess == cudaStat1); - assert(cudaSuccess == cudaStat1); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - printf("==== x1 = inv(A)*b1 \n"); - for (int j = 0; j < n; j++) { - printf("x1[%d] = %f\n", j, X[j]); - } - float r1_nrminf; - residaul_eval( - n, - descrA, - csrValA, - csrRowPtrA, - csrColIndA, - B, - X, - &r1_nrminf - ); - printf("|b1 - A*x1| = %E\n", r1_nrminf); - - printf("==== x2 = inv(A)*b2 \n"); - for (int j = 0; j < n; j++) { - printf("x2[%d] = %f\n", j, X[n + j]); - } - float r2_nrminf; - residaul_eval( - n, - descrA, - csrValA, - csrRowPtrA, - csrColIndA, - B + n, - X + n, - &r2_nrminf - ); - printf("|b2 - A*x2| = %E\n", r2_nrminf); - - /* free resources */ - // CHECK: if (d_csrRowPtrA) hipFree(d_csrRowPtrA); - if (d_csrRowPtrA) cudaFree(d_csrRowPtrA); - // CHECK: if (d_csrColIndA) hipFree(d_csrColIndA); - if (d_csrColIndA) cudaFree(d_csrColIndA); - // CHECK: if (d_csrValA) hipFree(d_csrValA); - if (d_csrValA) cudaFree(d_csrValA); - // CHECK: if (d_B) hipFree(d_B); - if (d_B) cudaFree(d_B); - // CHECK: if (handle) hipsparseDestroy(handle); - if (handle) cusparseDestroy(handle); - // CHECK: if (stream) hipStreamDestroy(stream); - if (stream) cudaStreamDestroy(stream); - // CHECK: if (descrA) hipsparseDestroyMatDescr(descrA); - if (descrA) cusparseDestroyMatDescr(descrA); - // NOTE: CUDA 10.0 - // TODO: if (info) hipsparseDestroyCsrsm2Info(info); - if (info) cusparseDestroyCsrsm2Info(info); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu index c6d62c0007..e6a2178053 100644 --- a/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu +++ b/tests/hipify-clang/unit_tests/libraries/cuSPARSE/cuSPARSE_12.cu @@ -385,8 +385,7 @@ double compute_BSR(BCRSArrays& bcsr, double *x , double *y){ cudaEventCreate(&startTime); cudaEventCreate(&stopTime); cudaEventRecord(startTime, bcsr.streamId); - // NOTE: cusparseDbsrmv and CUSPARSE_DIRECTION_COLUMN (of type cusparseDirection_t) are yet unsupported by HIP - // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, + // CHECK: cusparseDbsrmv(bcsr.cusparseHandle, HIPSPARSE_DIRECTION_COLUMN, HIPSPARSE_OPERATION_NON_TRANSPOSE, cusparseDbsrmv(bcsr.cusparseHandle, CUSPARSE_DIRECTION_COLUMN, CUSPARSE_OPERATION_NON_TRANSPOSE, bcsr.nbBlockRow, bcsr.m, bcsr.nbBlocks, &alpha, descr, bcsr.cu_bsrValC, bcsr.cu_bsrRowPtrC, bcsr.cu_bsrColIndC, bcsr.blockSize, diff --git a/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu b/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu deleted file mode 100644 index 0d9fa1cf3f..0000000000 --- a/tests/hipify-clang/unit_tests/namespace/ns_kernel_launch.cu +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include - -__global__ void test_0() { - int a = 10; -} - -namespace first { - __global__ void test_1() { - int b = 20; - } - namespace second { - __global__ void test_2() { - int c = 30; - } - } -} - -int main() { - // CHECK: hipLaunchKernelGGL(::test_0, dim3(1), dim3(1), 0, 0); - ::test_0<<<1, 1>>>(); - // CHECK: hipLaunchKernelGGL(first::test_1, dim3(1), dim3(1), 0, 0); - first::test_1<<<1, 1>>>(); - // CHECK: hipLaunchKernelGGL(first::second::test_2, dim3(1), dim3(1), 0, 0); - first::second::test_2<<<1, 1>>>(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu deleted file mode 100644 index 51bfeb6017..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals.cu +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -// CHECK: #include - -#include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 1>>> (a, y, x); - axpy_kernel <<<1, 1>>> (a, y, x); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); - axpy_kernel <<<1, 2>>> (a, y, x); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu deleted file mode 100644 index 310d896054..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args "--skip-excluded-preprocessor-conditional-blocks" %clang_args -// CHECK: #include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { -float* y_new = nullptr; -#ifdef SOME_MACRO - y_new = x; - // CHECK: axpy_kernel <<<1, 1>>> (a, y_new, x); - axpy_kernel <<<1, 1>>> (a, y_new, x); -#endif - -#ifndef SOME_MACRO - y_new = y; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); - axpy_kernel <<<1, 2>>> (a, y_new, x); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 3>>> (a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -#ifdef SOME_MACRO - // CHECK: axpy_kernel <<<1, 5>>> (a, y, x); - axpy_kernel <<<1, 5>>> (a, y, x); -#elif defined SOME_MACRO_1 - // CHECK: axpy_kernel <<<1, 6>>> (a, x, y); - axpy_kernel <<<1, 6>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); - axpy_kernel <<<1, 7>>> (a, x, y); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); - axpy_kernel <<<1, 8>>> (a, y, x); -#elif !defined(SOME_MACRO_1) - // CHECK: axpy_kernel <<<1, 9>>> (a, x, y); - axpy_kernel <<<1, 9>>> (a, x, y); -#else - // CHECK: axpy_kernel <<<1, 10>>> (a, x, y); - axpy_kernel <<<1, 10>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu deleted file mode 100644 index a5c7c41745..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_01_LLVM_10.cu +++ /dev/null @@ -1,52 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { -float* y_new = nullptr; -#ifdef SOME_MACRO - y_new = x; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y_new, x); - axpy_kernel <<<1, 1>>> (a, y_new, x); -#endif - -#ifndef SOME_MACRO - y_new = y; - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y_new, x); - axpy_kernel <<<1, 2>>> (a, y_new, x); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(5), 0, 0, a, y, x); - axpy_kernel <<<1, 5>>> (a, y, x); -#elif defined SOME_MACRO_1 - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(6), 0, 0, a, x, y); - axpy_kernel <<<1, 6>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(7), 0, 0, a, x, y); - axpy_kernel <<<1, 7>>> (a, x, y); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(8), 0, 0, a, y, x); - axpy_kernel <<<1, 8>>> (a, y, x); -#elif !defined(SOME_MACRO_1) - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(9), 0, 0, a, x, y); - axpy_kernel <<<1, 9>>> (a, x, y); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(10), 0, 0, a, x, y); - axpy_kernel <<<1, 10>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu b/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu deleted file mode 100644 index 06ce48ebef..0000000000 --- a/tests/hipify-clang/unit_tests/pp/pp_if_else_conditionals_LLVM_10.cu +++ /dev/null @@ -1,30 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include - -#include - -__global__ void axpy_kernel(float a, float* x, float* y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -void axpy(float a, float* x, float* y) { - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(1), 0, 0, a, y, x); - axpy_kernel <<<1, 1>>> (a, y, x); -#endif - -#ifndef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(2), 0, 0, a, y, x); - axpy_kernel <<<1, 2>>> (a, y, x); -#endif - -#ifdef SOME_MACRO - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(3), 0, 0, a, y, x); - axpy_kernel <<<1, 3>>> (a, y, x); -#else - // CHECK: hipLaunchKernelGGL(axpy_kernel, dim3(1), dim3(4), 0, 0, a, x, y); - axpy_kernel <<<1, 4>>> (a, x, y); -#endif - -} \ No newline at end of file diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp deleted file mode 100644 index 2bf196c0cd..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/0_MatrixTranspose/MatrixTranspose.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - int i; - int errors; - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Lauching kernel from host - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>>(gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp deleted file mode 100644 index e7953dc862..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/texture2dDrv.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include -#include -#include -#include -#include - -#define fileName "tex2dKernel.code" -// CHECK: texture tex; -texture tex; -bool testResult = false; - -// CHECK: hipError_t status = cmd; -// CHECK: if (status != hipSuccess) { -// CHECK: std::cout << "error: #" << status << " (" << hipGetErrorString(status) -#define CUDACHECK(cmd) \ - { \ - cudaError_t status = cmd; \ - if (status != cudaSuccess) { \ - std::cout << "error: #" << status << " (" << cudaGetErrorString(status) \ - << ") at line:" << __LINE__ << ": " << #cmd << std::endl; \ - abort(); \ - } \ - } - -bool runTest(int argc, char** argv) { - unsigned int width = 256; - unsigned int height = 256; - unsigned int size = width * height * sizeof(float); - float* hData = (float*)malloc(size); - memset(hData, 0, size); - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - hData[i * width + j] = i * width + j; - } - } - // CHECK: hipModule_t Module; - CUmodule Module; - // CHECK: hipModuleLoad(&Module, fileName); - cuModuleLoad(&Module, fileName); - - // CHECK: hipArray * array; - CUarray array; - // CHECK: HIP_ARRAY_DESCRIPTOR desc; - CUDA_ARRAY_DESCRIPTOR desc; - // CHECK: desc.Format = HIP_AD_FORMAT_FLOAT; - desc.Format = CU_AD_FORMAT_FLOAT; - desc.NumChannels = 1; - desc.Width = width; - desc.Height = height; - // CHECK: hipArrayCreate(&array, &desc); - cuArrayCreate(&array, &desc); - - // CHECK: hip_Memcpy2D copyParam; - CUDA_MEMCPY2D copyParam; - memset(©Param, 0, sizeof(copyParam)); - // CHECK: copyParam.dstMemoryType = hipMemoryTypeArray; - copyParam.dstMemoryType = CU_MEMORYTYPE_ARRAY; - copyParam.dstArray = array; - // CHECK: copyParam.srcMemoryType = hipMemoryTypeHost; - copyParam.srcMemoryType = CU_MEMORYTYPE_HOST; - copyParam.srcHost = hData; - copyParam.srcPitch = width * sizeof(float); - copyParam.WidthInBytes = copyParam.srcPitch; - copyParam.Height = height; - // CHECK: hipMemcpyParam2D(©Param); - cuMemcpy2D(©Param); - - // CHECK: textureReference* texref; - CUtexref_st* texref; - // CHECK: hipModuleGetTexRef(&texref, Module, "tex"); - cuModuleGetTexRef(&texref, Module, "tex"); - // CHECK: hipTexRefSetAddressMode(texref, 0, hipAddressModeWrap); - cuTexRefSetAddressMode(texref, 0, CU_TR_ADDRESS_MODE_WRAP); - // CHECK: hipTexRefSetAddressMode(texref, 1, hipAddressModeWrap); - cuTexRefSetAddressMode(texref, 1, CU_TR_ADDRESS_MODE_WRAP); - // CHECK: hipTexRefSetFilterMode(texref, hipFilterModePoint); - cuTexRefSetFilterMode(texref, CU_TR_FILTER_MODE_POINT); - // CHECK: hipTexRefSetFlags(texref, 0); - cuTexRefSetFlags(texref, 0); - // CHECK: hipTexRefSetFormat(texref, HIP_AD_FORMAT_FLOAT, 1); - cuTexRefSetFormat(texref, CU_AD_FORMAT_FLOAT, 1); - // CHECK: hipTexRefSetArray(texref, array, HIP_TRSA_OVERRIDE_FORMAT); - cuTexRefSetArray(texref, array, CU_TRSA_OVERRIDE_FORMAT); - - float* dData = NULL; - // CHECK: hipMalloc((void**)&dData, size); - cudaMalloc((void**)&dData, size); - - struct { - void* _Ad; - unsigned int _Bd; - unsigned int _Cd; - } args; - args._Ad = (void*) dData; - args._Bd = width; - args._Cd = height; - - size_t sizeTemp = sizeof(args); - - // CHECK: void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, - // CHECK: &sizeTemp, HIP_LAUNCH_PARAM_END}; - void* config[] = {CU_LAUNCH_PARAM_BUFFER_POINTER, &args, CU_LAUNCH_PARAM_BUFFER_SIZE, - &sizeTemp, CU_LAUNCH_PARAM_END}; - - // CHECK: hipFunction_t Function; - CUfunction Function; - // CHECK: hipModuleGetFunction(&Function, Module, "tex2dKernel"); - cuModuleGetFunction(&Function, Module, "tex2dKernel"); - - int temp1 = width / 16; - int temp2 = height / 16; - // CHECK: hipModuleLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL, (void**)&config); - cuLaunchKernel(Function, 16, 16, 1, temp1, temp2, 1, 0, 0, NULL, (void**)&config); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - float* hOutputData = (float*)malloc(size); - memset(hOutputData, 0, size); - // CHECK: hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost); - cudaMemcpy(hOutputData, dData, size, cudaMemcpyDeviceToHost); - - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - if (hData[i * width + j] != hOutputData[i * width + j]) { - printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j], - hOutputData[i * width + j]); - testResult = false; - break; - } - } - } - // CHECK: hipFree(dData); - cudaFree(dData); - // CHECK: hipFreeArray(hipArray_t(array)); - cudaFreeArray(cudaArray_t(array)); - return true; -} - -int main(int argc, char** argv) { - // CHECK: hipInit(0); - cuInit(0); - testResult = runTest(argc, argv); - printf("%s ...\n", testResult ? "PASSED" : "FAILED"); - exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp deleted file mode 100644 index d4277e133c..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/13_occupancy/occupancy.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR -IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// CHECK: #include "hip/hip_runtime.h" -#include "cuda_runtime.h" -#include -#define NUM 1000000 - -// CHECK: if (status != hipSuccess) { -#define CUDA_CHECK(status) \ - if (status != cudaSuccess) { \ - std::cout << "Got Status: " << status << " at Line: " << __LINE__ << std::endl; \ - exit(0); \ - } - -// Device (Kernel) function -__global__ void multiply(float* C, float* A, float* B, int N) { - int tx = blockDim.x*blockIdx.x+threadIdx.x; - if (tx < N) { - C[tx] = A[tx] * B[tx]; - } -} - -// CPU implementation -void multiplyCPU(float* C, float* A, float* B, int N) { - for(unsigned int i=0; i>> (C, A, B, NUM); - - // Record the stop event - // CHECK: CUDA_CHECK(hipEventRecord(stop, NULL)); - CUDA_CHECK(cudaEventRecord(stop, NULL)); - // CHECK: CUDA_CHECK(hipEventSynchronize(stop)); - CUDA_CHECK(cudaEventSynchronize(stop)); - - // CHECK: CUDA_CHECK(hipEventElapsedTime(&eventMs, start, stop)); - CUDA_CHECK(cudaEventElapsedTime(&eventMs, start, stop)); - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Calculate Occupancy - int numBlock = 0; - // CHECK: CUDA_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); - CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0)); - - if(devProp.maxThreadsPerMultiProcessor) { - std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl; - } -} - -int main() { - float *A, *B, *C0, *C1, *cpuC; - float *Ad, *Bd, *C0d, *C1d; - int errors=0; - - // Initialize the input data - A = (float*)malloc(NUM * sizeof(float)); - B = (float*)malloc(NUM * sizeof(float)); - C0 = (float*)malloc(NUM * sizeof(float)); - C1 = (float*)malloc(NUM * sizeof(float)); - cpuC = (float*)malloc(NUM * sizeof(float)); - - for(int i=0; i< NUM; i++) { - A[i] = i; - B[i] = i; - } - - // Allocate the memory on the device side - // CHECK: CUDA_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&Ad, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&Bd, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&C0d, NUM * sizeof(float))); - // CHECK: CUDA_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float))); - CUDA_CHECK(cudaMalloc((void**)&C1d, NUM * sizeof(float))); - - // Memory transfer from host to device - // CHECK: CUDA_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(Ad,A,NUM * sizeof(float), cudaMemcpyHostToDevice)); - // CHECK: CUDA_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(Bd,B,NUM * sizeof(float), cudaMemcpyHostToDevice)); - - // Kernel launch with manual/default block size - launchKernel(C0d, Ad, Bd, 1); - - // Kernel launch with the block size suggested by cudaOccupancyMaxPotentialBlockSize - launchKernel(C1d, Ad, Bd, 0); - - // Memory transfer from device to host - // CHECK: CUDA_CHECK(hipMemcpy(C0,C0d, NUM * sizeof(float), hipMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(C0,C0d, NUM * sizeof(float), cudaMemcpyDeviceToHost)); - // CHECK: CUDA_CHECK(hipMemcpy(C1,C1d, NUM * sizeof(float), hipMemcpyDeviceToHost)); - CUDA_CHECK(cudaMemcpy(C1,C1d, NUM * sizeof(float), cudaMemcpyDeviceToHost)); - - // CPU computation - multiplyCPU(cpuC, A, B, NUM); - - // Verify the results - double eps = 1.0E-6; - - for (int i = 0; i < NUM; i++) { - if (std::abs(C0[i] - cpuC[i]) > eps) { - errors++; - } - } - - if (errors != 0) { - printf("\nManual Test FAILED: %d errors\n", errors); - errors=0; - } else { - printf("\nManual Test PASSED!\n"); - } - - for (int i = 0; i < NUM; i++) { - if (std::abs(C1[i] - cpuC[i]) > eps) { - errors++; - } - } - - if (errors != 0) { - printf("\n Automatic Test FAILED: %d errors\n", errors); - } else { - printf("\nAutomatic Test PASSED!\n"); - } - - // CHECK: CUDA_CHECK(hipFree(Ad)); - CUDA_CHECK(cudaFree(Ad)); - // CHECK: CUDA_CHECK(hipFree(Bd)); - CUDA_CHECK(cudaFree(Bd)); - // CHECK: CUDA_CHECK(hipFree(C0d)); - CUDA_CHECK(cudaFree(C0d)); - // CHECK: CUDA_CHECK(hipFree(C1d)); - CUDA_CHECK(cudaFree(C1d)); - - free(A); - free(B); - free(C0); - free(C1); - free(cpuC); -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp deleted file mode 100644 index 323be01ff1..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/1_hipEvent/hipEvent.cpp +++ /dev/null @@ -1,181 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include - -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; - // CHECK: hipEventCreate(&start); - cudaEventCreate(&start); - // CHECK: hipEventCreate(&stop); - cudaEventCreate(&stop); - float eventMs = 1.0f; - - int i; - int errors; - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Lauching kernel from host - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>>(gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - errors = 0; - double eps = 1.0E-6; - for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp deleted file mode 100644 index 4a1b28001f..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/2_Profiler/Profiler.cpp +++ /dev/null @@ -1,250 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -// NOTE: Profiler API is under development. -// NOTE: This is NOT WORKING example. -// TODO: Get rid of HIP_SCOPED_MARKER, HIP_BEGIN_MARKER, HIP_END_MARKER, declared in hip/hip_profile.h or -// TODO: find out a way to hipify it in particular place (signatures are to obtain). - -#include - -// CHECK: #include -#include -// CHECK: #include -#include - -#define WIDTH 1024 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -#define ITERATIONS 10 - -// Cmdline parms to control start and stop triggers -int startTriggerIteration = -1; -int stopTriggerIteration = -1; - -// Device (Kernel) function, it must be void -__global__ void matrixTranspose(float* out, float* in, const int width) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - out[y * width + x] = in[x * width + y]; -} - -// CPU implementation of matrix transpose -void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } - } -} - -// Use a separate function to demonstrate how to use function name as part of scoped marker: -void runGPU(float* Matrix, float* TransposeMatrix, float* gpuMatrix, float* gpuTransposeMatrix) { - // __func__ is a standard C++ macro which expands to the name of the function, in this case - // "runGPU" -// TODO: Find out signatures to generate the following: -// HIP_SCOPED_MARKER(__func__, "MyGroup"); - - for (int i = 0; i < ITERATIONS; i++) { - if (i == startTriggerIteration) { - // CHECK: hipProfilerStart(); - cudaProfilerStart(); - } - if (i == stopTriggerIteration) { - // CHECK: hipProfilerStop(); - cudaProfilerStop(); - } - - float eventMs = 0.0f; - - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; - // CHECK: hipEventCreate(&start); - cudaEventCreate(&start); - // CHECK: hipEventCreate(&stop); - cudaEventCreate(&stop); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from host to device - // CHECK: hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), cudaMemcpyHostToDevice); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - // CHECK: printf("hipMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - printf("cudaMemcpyHostToDevice time taken = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Lauching kernel from host - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); - matrixTranspose <<>> (gpuTransposeMatrix, gpuMatrix, WIDTH); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - printf("kernel Execution time = %6.3fms\n", eventMs); - - // Record the start event - // CHECK: hipEventRecord(start, NULL); - cudaEventRecord(start, NULL); - - // Memory transfer from device to host - // CHECK: hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), cudaMemcpyDeviceToHost); - - // Record the stop event - // CHECK: hipEventRecord(stop, NULL); - cudaEventRecord(stop, NULL); - // CHECK: hipEventSynchronize(stop); - cudaEventSynchronize(stop); - - // CHECK: hipEventElapsedTime(&eventMs, start, stop); - cudaEventElapsedTime(&eventMs, start, stop); - - // CHECK: printf("hipMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - printf("cudaMemcpyDeviceToHost time taken = %6.3fms\n", eventMs); - } -}; - -int main(int argc, char* argv[]) { - if (argc >= 2) { - startTriggerIteration = atoi(argv[1]); - printf("info : will start tracing at iteration:%d\n", startTriggerIteration); - } - if (argc >= 3) { - stopTriggerIteration = atoi(argv[2]); - printf("info : will stop tracing at iteration:%d\n", stopTriggerIteration); - } - - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; - - float* gpuMatrix; - float* gpuTransposeMatrix; - - // CHECK: hipDeviceProp_t devProp; - cudaDeviceProp devProp; - // CHECK: hipGetDeviceProperties(&devProp, 0); - cudaGetDeviceProperties(&devProp, 0); - - std::cout << "Device name " << devProp.name << std::endl; - - { - // Show example of how to create a "scoped marker". - // The scoped marker records the time spent inside the { scope } of the marker - the begin - // timestamp is at the beginning of the code scope, and the end is recorded when the SCOPE - // exits. This can be viewed in CodeXL timeline relative to other GPU and CPU events. This - // marker captures the time spent in setup including host allocation, initialization, and - // device memory allocation. -// TODO: Find out signatures to generate the following: -// HIP_SCOPED_MARKER("Setup", "MyGroup"); - - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - - // initialize the input data - for (int i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } - - // allocate the memory on the device side - // CHECK: hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - - // FYI, the scoped-marker will be destroyed here when the scope exits, and will record its - // "end" timestamp. - } - - runGPU(Matrix, TransposeMatrix, gpuMatrix, gpuTransposeMatrix); - - // show how to use explicit begin/end markers: - // We begin the timed region with HIP_BEGIN_MARKER, passing in the markerName and group: - // The region will stop when HIP_END_MARKER is called - // This is another way to mark begin/end - as an alternative to scoped markers. -// TODO: Find out signatures to generate the following: -// HIP_BEGIN_MARKER("Check&TearDown", "MyGroup"); - - int errors = 0; - - // CPU MatrixTranspose computation - matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); - - // verify the results - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("PASSED!\n"); - } - - // free the resources on device side - // CHECK: hipFree(gpuMatrix); - cudaFree(gpuMatrix); - // CHECK: hipFree(gpuTransposeMatrix); - cudaFree(gpuTransposeMatrix); - - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - // This ends the last marker started in this thread, in this case "Check&TearDown" -// TODO: Find out signatures to generate the following: -// HIP_END_MARKER(); - - return errors; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp deleted file mode 100644 index b7fadbf936..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/7_streams/stream.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -// CHECK: #include -#include - -#define WIDTH 32 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -using namespace std; - -__global__ void matrixTranspose_static_shared(float* out, float* in, - const int width) { - // CHECK-NOT: HIP_DYNAMIC_SHARED(float, sharedMem); - // CHECK: __shared__ float sharedMem[WIDTH * WIDTH]; - __shared__ float sharedMem[WIDTH * WIDTH]; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - sharedMem[y * width + x] = in[x * width + y]; - - __syncthreads(); - - out[y * width + x] = sharedMem[y * width + x]; -} - -__global__ void matrixTranspose_dynamic_shared(float* out, float* in, - const int width) { - // declare dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(int, sharedMem) - extern __shared__ int sharedMem[]; - - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - - sharedMem[y * width + x] = in[x * width + y]; - - __syncthreads(); - - out[y * width + x] = sharedMem[y * width + x]; -} - -void MultipleStream(float** data, float* randArray, float** gpuTransposeMatrix, - float** TransposeMatrix, int width) { - const int num_streams = 2; - // CHECK: hipStream_t streams[num_streams]; - cudaStream_t streams[num_streams]; - - // CHECK: for (int i = 0; i < num_streams; i++) hipStreamCreate(&streams[i]); - for (int i = 0; i < num_streams; i++) cudaStreamCreate(&streams[i]); - - for (int i = 0; i < num_streams; i++) { - // CHECK: hipMalloc((void**)&data[i], NUM * sizeof(float)); - cudaMalloc((void**)&data[i], NUM * sizeof(float)); - // CHECK: hipMemcpyAsync(data[i], randArray, NUM * sizeof(float), hipMemcpyHostToDevice, streams[i]); - cudaMemcpyAsync(data[i], randArray, NUM * sizeof(float), cudaMemcpyHostToDevice, streams[i]); - } - - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose_static_shared, dim3(dimGrid), dim3(dimBlock), 0, streams[0], gpuTransposeMatrix[0], data[0], width); - matrixTranspose_static_shared <<>>(gpuTransposeMatrix[0], data[0], width); - // CHECK: hipLaunchKernelGGL(matrixTranspose_dynamic_shared, dim3(dimGrid), dim3(dimBlock), 0, streams[1], gpuTransposeMatrix[1], data[1], width); - matrixTranspose_dynamic_shared <<>>(gpuTransposeMatrix[1], data[1], width); - - for (int i = 0; i < num_streams; i++) - // CHECK: hipMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), hipMemcpyDeviceToHost, streams[i]); - cudaMemcpyAsync(TransposeMatrix[i], gpuTransposeMatrix[i], NUM * sizeof(float), cudaMemcpyDeviceToHost, streams[i]); -} - -int main() { - // CHECK: hipSetDevice(0); - cudaSetDevice(0); - - float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray; - - int width = WIDTH; - - randArray = (float*)malloc(NUM * sizeof(float)); - - TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float)); - - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - - for (int i = 0; i < NUM; i++) { - randArray[i] = (float)i * 1.0f; - } - - MultipleStream(data, randArray, gpuTransposeMatrix, TransposeMatrix, width); - - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // verify the results - int errors = 0; - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[0][i] - TransposeMatrix[1][i]) > eps) { - printf("%d stream0: %f stream1 %f\n", i, TransposeMatrix[0][i], TransposeMatrix[1][i]); - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("stream PASSED!\n"); - } - - free(randArray); - for (int i = 0; i < 2; i++) { - // CHECK: hipFree(data[i]); - cudaFree(data[i]); - // CHECK: hipFree(gpuTransposeMatrix[i]); - cudaFree(gpuTransposeMatrix[i]); - free(TransposeMatrix[i]); - } - - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp b/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp deleted file mode 100644 index be2ec7399d..0000000000 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/8_peer2peer/peer2peer.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -/* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -// CHECK: #include -#include -#include -#define WIDTH 32 - -#define NUM (WIDTH * WIDTH) - -#define THREADS_PER_BLOCK_X 4 -#define THREADS_PER_BLOCK_Y 4 -#define THREADS_PER_BLOCK_Z 1 - -using namespace std; - -#define KNRM "\x1B[0m" -#define KRED "\x1B[31m" - -#define failed(...) \ - printf("%serror: ", KRED); \ - printf(__VA_ARGS__); \ - printf("\n"); \ - printf("error: TEST FAILED\n%s", KNRM); \ - abort(); - -// CHECK: hipError_t localError = error; -// CHECK: if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled) && -// CHECK: (localError != hipErrorPeerAccessNotEnabled )) { -// CHECK: printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), -#define CUDACHECK(error) \ - { \ - cudaError_t localError = error; \ - if ((localError != cudaSuccess) && (localError != cudaErrorPeerAccessAlreadyEnabled) && \ - (localError != cudaErrorPeerAccessNotEnabled )) { \ - printf("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, cudaGetErrorString(localError), \ - localError, #error, __FILE__, __LINE__, KNRM); \ - failed("API returned error code."); \ - } \ - } - -void checkPeer2PeerSupport() { - int gpuCount; - int canAccessPeer; - // CHECK: CUDACHECK(hipGetDeviceCount(&gpuCount)); - CUDACHECK(cudaGetDeviceCount(&gpuCount)); - for (int currentGpu = 0; currentGpu < gpuCount; currentGpu++) { - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - for (int peerGpu = 0; peerGpu < currentGpu; peerGpu++) { - if (currentGpu != peerGpu) { - // CHECK: CUDACHECK(hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu)); - CUDACHECK(cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu)); - printf("currentGpu#%d canAccessPeer: peerGpu#%d=%d\n", currentGpu, peerGpu, - canAccessPeer); - } - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - } - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - } -} - -void enablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - // Must be on a multi-gpu system: - assert(currentGpu != peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - if (canAccessPeer == 1) { - // CHECK: CUDACHECK(hipDeviceEnablePeerAccess(peerGpu, 0)); - CUDACHECK(cudaDeviceEnablePeerAccess(peerGpu, 0)); - } else - printf("peer2peer transfer not possible between the selected gpu devices"); -} - -void disablePeer2Peer(int currentGpu, int peerGpu) { - int canAccessPeer; - // Must be on a multi-gpu system: - assert(currentGpu != peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: hipDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - cudaDeviceCanAccessPeer(&canAccessPeer, currentGpu, peerGpu); - if (canAccessPeer == 1) { - // CHECK: CUDACHECK(hipDeviceDisablePeerAccess(peerGpu)); - CUDACHECK(cudaDeviceDisablePeerAccess(peerGpu)); - } else - printf("peer2peer disable not required"); -} - -__global__ void matrixTranspose_static_shared(float* out, float* in, - const int width) { - // CHECK-NOT: HIP_DYNAMIC_SHARED(float, sharedMem); - // CHECK: __shared__ float sharedMem[WIDTH * WIDTH]; - __shared__ float sharedMem[WIDTH * WIDTH]; - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - sharedMem[y * width + x] = in[x * width + y]; - __syncthreads(); - out[y * width + x] = sharedMem[y * width + x]; -} - -__global__ void matrixTranspose_dynamic_shared(float* out, float* in, - const int width) { - // declare dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(float, sharedMem) - extern __shared__ float sharedMem[]; - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - sharedMem[y * width + x] = in[x * width + y]; - __syncthreads(); - out[y * width + x] = sharedMem[y * width + x]; -} - -int main() { - checkPeer2PeerSupport(); - int gpuCount; - int currentGpu, peerGpu; - // CHECK: CUDACHECK(hipGetDeviceCount(&gpuCount)); - CUDACHECK(cudaGetDeviceCount(&gpuCount)); - if (gpuCount < 2) { - printf("Peer2Peer application requires atleast 2 gpu devices"); - return 0; - } - currentGpu = 0; - peerGpu = (currentGpu + 1); - printf("currentGpu=%d peerGpu=%d (Total no. of gpu = %d)\n", currentGpu, peerGpu, gpuCount); - float *data[2], *TransposeMatrix[2], *gpuTransposeMatrix[2], *randArray; - int width = WIDTH; - randArray = (float*)malloc(NUM * sizeof(float)); - for (int i = 0; i < NUM; i++) { - randArray[i] = (float)i * 1.0f; - } - enablePeer2Peer(currentGpu, peerGpu); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - TransposeMatrix[0] = (float*)malloc(NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[0], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&data[0], NUM * sizeof(float)); - cudaMalloc((void**)&data[0], NUM * sizeof(float)); - // CHECK: hipMemcpy(data[0], randArray, NUM * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(data[0], randArray, NUM * sizeof(float), cudaMemcpyHostToDevice); - dim3 dimGrid(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y); - dim3 dimBlock(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y); - // CHECK: hipLaunchKernelGGL(matrixTranspose_static_shared, dim3(dimGrid), dim3(dimBlock), 0, 0, gpuTransposeMatrix[0], data[0], width); - matrixTranspose_static_shared <<>>(gpuTransposeMatrix[0], data[0], width); - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - TransposeMatrix[1] = (float*)malloc(NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - cudaMalloc((void**)&gpuTransposeMatrix[1], NUM * sizeof(float)); - // CHECK: hipMalloc((void**)&data[1], NUM * sizeof(float)); - cudaMalloc((void**)&data[1], NUM * sizeof(float)); - // CHECK: hipMemcpy(data[1], gpuTransposeMatrix[0], NUM * sizeof(float), hipMemcpyDeviceToDevice); - cudaMemcpy(data[1], gpuTransposeMatrix[0], NUM * sizeof(float), cudaMemcpyDeviceToDevice); - // CHECK: hipLaunchKernelGGL(matrixTranspose_dynamic_shared, dim3(dimGrid), dim3(dimBlock), sizeof(float) * WIDTH * WIDTH, 0, gpuTransposeMatrix[1], data[1], width); - matrixTranspose_dynamic_shared <<>>(gpuTransposeMatrix[1], data[1], width); - // CHECK: hipMemcpy(TransposeMatrix[1], gpuTransposeMatrix[1], NUM * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(TransposeMatrix[1], gpuTransposeMatrix[1], NUM * sizeof(float), cudaMemcpyDeviceToHost); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - disablePeer2Peer(currentGpu, peerGpu); - // verify the results - int errors = 0; - double eps = 1.0E-6; - for (int i = 0; i < NUM; i++) { - if (std::abs(randArray[i] - TransposeMatrix[1][i]) > eps) { - printf("%d cpu: %f gpu peered data %f\n", i, randArray[i], TransposeMatrix[1][i]); - errors++; - } - } - if (errors != 0) { - printf("FAILED: %d errors\n", errors); - } else { - printf("Peer2Peer PASSED!\n"); - } - free(randArray); - for (int i = 0; i < 2; i++) { - // CHECK: hipFree(data[i]); - cudaFree(data[i]); - // CHECK: hipFree(gpuTransposeMatrix[i]); - cudaFree(gpuTransposeMatrix[i]); - free(TransposeMatrix[i]); - } - // CHECK: CUDACHECK(hipSetDevice(peerGpu)); - CUDACHECK(cudaSetDevice(peerGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - // CHECK: CUDACHECK(hipSetDevice(currentGpu)); - CUDACHECK(cudaSetDevice(currentGpu)); - // CHECK: CUDACHECK(hipDeviceReset()); - CUDACHECK(cudaDeviceReset()); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp b/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp deleted file mode 100644 index d9122160b1..0000000000 --- a/tests/hipify-clang/unit_tests/samples/MallocManaged.cpp +++ /dev/null @@ -1,45 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args -// CHECK: #include -#include - -__global__ -void add(int n, float *x, float *y) -{ - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] + y[i]; -} - -int main(int argc, char *argv[]) -{ - int numElements = 10; - bool testResult = true; - float *A, *B; - // CHECK: hipMallocManaged(&A, numElements * sizeof(float)); - cudaMallocManaged(&A, numElements * sizeof(float)); - // CHECK: hipMallocManaged(&B, numElements * sizeof(float)); - cudaMallocManaged(&B, numElements * sizeof(float)); - for (int i = 0; i < numElements; i++) { - A[i] = 1.0f; - B[i] = 2.0f; - } - int blockSize = 256; - int numBlocks = (numElements + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - // CHECK: hipLaunchKernelGGL(add, dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); - add<<>>(numElements, A, B); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - float maxError = 0.0f; - for (int i = 0; i < numElements; i++) - maxError = fmax(maxError, fabs(B[i]-3.0f)); - // CHECK: hipFree(A); - cudaFree(A); - // CHECK: hipFree(B); - cudaFree(B); - if(maxError == 0.0f) - return 0; - return -1; -} diff --git a/tests/hipify-clang/unit_tests/samples/allocators.cu b/tests/hipify-clang/unit_tests/samples/allocators.cu deleted file mode 100644 index 4d33e315bf..0000000000 --- a/tests/hipify-clang/unit_tests/samples/allocators.cu +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#pragma once -// CHECK: #include -#include -#include - -/** - * Allocate GPU memory for `count` elements of type `T`. - */ -template -static T* gpuMalloc(size_t count) { - T* ret = nullptr; - // CHECK: hipMalloc(&ret, count * sizeof(T)); - cudaMalloc(&ret, count * sizeof(T)); - return ret; -} - -template -__global__ void add(int n, T* x, T* y) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - int stride = blockDim.x * gridDim.x; - for (int i = index; i < n; i += stride) - y[i] = x[i] + y[i]; -} - -int main(int argc, char* argv[]) { - size_t numElements = 50; - float *A = gpuMalloc(numElements); - float* B = gpuMalloc(numElements); - for (int i = 0; i < numElements; ++i) { - A[i] = 1.0f; - B[i] = 2.0f; - } - int blockSize = 512; - int numBlocks = (numElements + blockSize - 1) / blockSize; - dim3 dimGrid(numBlocks, 1, 1); - dim3 dimBlock(blockSize, 1, 1); - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(add), dim3(dimGrid), dim3(dimBlock), 0, 0, numElements, A, B); - add<<>>(numElements, A, B); - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - float maxError = 0.0f; - for (int i = 0; i < numElements; ++i) - maxError = fmax(maxError, fabs(B[i] - 3.0f)); - // CHECK: hipFree(A); - cudaFree(A); - // CHECK: hipFree(B); - cudaFree(B); - if (maxError == 0.0f) - return 0; - return -1; - } diff --git a/tests/hipify-clang/unit_tests/samples/axpy.cu b/tests/hipify-clang/unit_tests/samples/axpy.cu deleted file mode 100644 index c09c372967..0000000000 --- a/tests/hipify-clang/unit_tests/samples/axpy.cu +++ /dev/null @@ -1,97 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include - -// CHECK: #include -#include - -#define TOKEN_PASTE(X, Y) X ## Y -#define ARG_LIST_AS_MACRO a, device_x, device_y -#define KERNEL_CALL_AS_MACRO axpy<<<1, kDataLen>>> -#define KERNEL_NAME_MACRO axpy - -// CHECK: #define COMPLETE_LAUNCH hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y) -#define COMPLETE_LAUNCH axpy<<<1, kDataLen>>>(a, device_x, device_y) - - -template -__global__ void axpy(T a, T *x, T *y) { - y[threadIdx.x] = a * x[threadIdx.x]; -} - -__global__ void empty() { -} - -int main(int argc, char* argv[]) { - const int kDataLen = 4; - - float a = 2.0f; - float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f}; - float host_y[kDataLen]; - - // Copy input data to device. - float* device_x; - float* device_y; - - // CHECK: hipMalloc(&device_x, kDataLen * sizeof(float)); - cudaMalloc(&device_x, kDataLen * sizeof(float)); - -#ifdef HERRING - // CHECK: hipMalloc(&device_y, kDataLen * sizeof(float)); - cudaMalloc(&device_y, kDataLen * sizeof(float)); -#else - // CHECK: hipMalloc(&device_y, kDataLen * sizeof(double)); - cudaMalloc(&device_y, kDataLen * sizeof(double)); -#endif - - // CHECK: hipMemcpy(device_x, host_x, kDataLen * sizeof(float), hipMemcpyHostToDevice); - cudaMemcpy(device_x, host_x, kDataLen * sizeof(float), cudaMemcpyHostToDevice); - - // Launch the kernel in numerous different strange ways to exercise the prerocessor. - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); - axpy<<<1, kDataLen>>>(a, device_x, device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, device_x, device_y); - axpy<<<1, kDataLen>>>(a, device_x, device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, a, TOKEN_PASTE(device, _x), device_y); - axpy<<<1, kDataLen>>>(a, TOKEN_PASTE(device, _x), device_y); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - axpy<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(KERNEL_NAME_MACRO), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - KERNEL_NAME_MACRO<<<1, kDataLen>>>(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(HIP_KERNEL_NAME(axpy), dim3(1), dim3(kDataLen), 0, 0, ARG_LIST_AS_MACRO); - KERNEL_CALL_AS_MACRO(ARG_LIST_AS_MACRO); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen>>> ( ); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen, 0>>>(); - - // CHECK: hipLaunchKernelGGL(empty, dim3(1), dim3(kDataLen), 0, 0); - empty<<<1, kDataLen, 0, 0>>>(); - - // CHECK: COMPLETE_LAUNCH; - COMPLETE_LAUNCH; - - - // Copy output data to host. - // CHECK: hipDeviceSynchronize(); - cudaDeviceSynchronize(); - - // CHECK: hipMemcpy(host_y, device_y, kDataLen * sizeof(float), hipMemcpyDeviceToHost); - cudaMemcpy(host_y, device_y, kDataLen * sizeof(float), cudaMemcpyDeviceToHost); - - // Print the results. - for (int i = 0; i < kDataLen; ++i) { - std::cout << "y[" << i << "] = " << host_y[i] << "\n"; - } - - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/coalescing.cu b/tests/hipify-clang/unit_tests/samples/coalescing.cu deleted file mode 100644 index ec4645d673..0000000000 --- a/tests/hipify-clang/unit_tests/samples/coalescing.cu +++ /dev/null @@ -1,117 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// To measure effects of memory coalescing. Coalescing.cu -// B. Wilkinson Jan 30, 2011 - -#include -#include -#include -// CHECK: #include -#include - -#define BlockSize 16 // Size of blocks, 32 x 32 threads, fixed, used globally - -__global__ void gpu_Comput (int *h, int N, int T) { - -// Array loaded with global thread ID that acesses that location - - int col = threadIdx.x + blockDim.x * blockIdx.x; - int row = threadIdx.y + blockDim.y * blockIdx.y; - - int threadID = col + row * N; - int index = row + col * N; // sequentially down each row - - for (int t = 0; t < T; t++) // loop to repeat to reduce other time effects - h[index] = threadID; // load array with flattened global thread ID -} - -void printArray(int *h, int N) { - - printf("Results of computation, every N/8 numbers, eight numbers\n"); - - for (int row = 0; row < N; row += N/8) { - for (int col = 0; col < N; col += N/8) - printf("%6d ", h[col + row * N]); - printf("\n"); - } -} - -int main(int argc, char *argv[]) { - - int T = 100; // number of iterations, entered at keyboard - int B = 1; // number of blocks, entered at keyboard - char key; - - int *h, *dev_h; // ptr to array holding numbers on host and device - // CHECK: hipEvent_t start, stop; - cudaEvent_t start, stop; // cuda events to measure time - float elapsed_time_ms1; - // CHECK: hipEventCreate( &start ); - // CHECK: hipEventCreate( &stop ); - cudaEventCreate( &start ); - cudaEventCreate( &stop ); - -/* ------------------------- Keyboard input -----------------------------------*/ - -do { // loop to repeat complete program - - printf("Grid Structure 2-D grid, 2-D blocks\n"); - printf("Blocks fixed at 16 x 16 threads, 512 threads, max for compute cap. 1.x\n"); - printf("Enter number of blocks in grid, each dimension, currently %d\n",B); - scanf("%d",&B); - printf("Enter number of iterations, currently %d\n",T); - scanf("%d",&T); - - int N = B * BlockSize; // size of data array, given input data - - printf("Array size (and total grid-block size) %d x %d\n", N, N); - - dim3 Block(BlockSize, BlockSize); //Block structure, 32 x 32 max - dim3 Grid(B, B); //Grid structure, B x B - -/* ------------------------- Allocate Memory-----------------------------------*/ - - int size = N * N * sizeof(int); // number of bytes in total in array - h = (int*) malloc(size); // Array on host - // CHECK: hipMalloc((void**)&dev_h, size); - cudaMalloc((void**)&dev_h, size); // allocate device memory - -/* ------------------------- GPU Computation -----------------------------------*/ - - // CHECK: hipEventRecord( start, 0 ); - cudaEventRecord( start, 0 ); - // CHECK: hipLaunchKernelGGL(gpu_Comput, dim3(Grid), dim3(Block), 0, 0, dev_h, N, T); - gpu_Comput<<< Grid, Block >>>(dev_h, N, T); - // CHECK: hipEventRecord( stop, 0 ); - // CHECK: hipEventSynchronize( stop ); - // CHECK: hipEventElapsedTime( &elapsed_time_ms1, start, stop ); - cudaEventRecord( stop, 0 ); // instrument code to measue end time - cudaEventSynchronize( stop ); // wait for all work done by threads - cudaEventElapsedTime( &elapsed_time_ms1, start, stop ); - // CHECK: hipMemcpy(h,dev_h, size ,hipMemcpyDeviceToHost); - cudaMemcpy(h,dev_h, size ,cudaMemcpyDeviceToHost); //Get results to check - - printArray(h,N); - printf("\nTime to calculate results on GPU: %f ms.\n", elapsed_time_ms1); - -/* -------------------------REPEAT PROGRAM INPUT-----------------------------------*/ - - printf("\nEnter c to repeat, return to terminate\n"); - - scanf("%c",&key); - scanf("%c",&key); - -} while (key == 'c'); // loop of complete program - -/* -------------- clean up ---------------------------------------*/ - -free(h); - // CHECK: hipFree(dev_h); - cudaFree(dev_h); - // CHECK: hipEventDestroy(start); - // CHECK: hipEventDestroy(stop); - cudaEventDestroy(start); - cudaEventDestroy(stop); - - return 0; -} diff --git a/tests/hipify-clang/unit_tests/samples/cudaRegister.cu b/tests/hipify-clang/unit_tests/samples/cudaRegister.cu deleted file mode 100644 index 2cc754300a..0000000000 --- a/tests/hipify-clang/unit_tests/samples/cudaRegister.cu +++ /dev/null @@ -1,106 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#include -#include -#include -#include -#include - -#define LEN 1024 -#define SIZE LEN * sizeof(float) -#define ITER 1024*1024 - -// CHECK: if(status != hipSuccess) { -#define check(msg, status){ \ -if(status != cudaSuccess) { \ - printf("%s failed. \n", #msg); \ -} \ -} - -__global__ void Inc1(float *Ad, float *Bd){ - int tx = threadIdx.x + blockIdx.x * blockDim.x; - if(tx < 1 ){ - for(int i=0;i>>(Ad, Bd); - A[0] = -(ITER*1.0f); - std::cout<<"Same cache line before completion: \t"<< A[0]<>>(Ad, Bd); - A[0] = -(ITER*1.0f); - std::cout<<"Diff cache line before completion: \t"< -// CHECK: #include -#include - -__global__ void dynamicReverse(int *d, int n) -{ - // Dynamic shared memory - // CHECK-NOT: extern __shared__ - // CHECK: HIP_DYNAMIC_SHARED(int, s); - extern __shared__ int s[]; - int t = threadIdx.x; - int tr = n-t-1; - s[t] = d[t]; - __syncthreads(); - d[t] = s[tr]; -} - -int main(void) -{ - const int n = 64; - int a[n], r[n], d[n]; - - for (int i = 0; i < n; i++) { - a[i] = i; - r[i] = n-i-1; - d[i] = 0; - } - - int *d_d; - // CHECK: hipMalloc(&d_d, n * sizeof(int)); - cudaMalloc(&d_d, n * sizeof(int)); - // run version with dynamic shared memory - // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); - cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(dynamicReverse, dim3(1), dim3(n), n*sizeof(int), 0, d_d, n); - dynamicReverse<<<1,n,n*sizeof(int)>>>(d_d, n); - // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); - cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); - for (int i = 0; i < n; i++) - if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); -} diff --git a/tests/hipify-clang/unit_tests/samples/intro.cu b/tests/hipify-clang/unit_tests/samples/intro.cu deleted file mode 100644 index 5ae5479aa9..0000000000 --- a/tests/hipify-clang/unit_tests/samples/intro.cu +++ /dev/null @@ -1,174 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -#include -#include -#include -// CHECK: #include -#include - -#define K_THREADS 64 -#define K_INDEX() ((gridDim.x * blockIdx.y + blockIdx.x) * blockDim.x + threadIdx.x) -#define RND() ((rand() & 0x7FFF) / float(0x8000)) -#define ERRORCHECK() cErrorCheck(__FILE__, __LINE__) - -// CHECK: hipEvent_t t##_start, t##_end; \ -// CHECK: hipEventCreate(&t##_start); \ -// CHECK: hipEventCreate(&t##_end); -#define TIMER_CREATE(t) \ - cudaEvent_t t##_start, t##_end; \ - cudaEventCreate(&t##_start); \ - cudaEventCreate(&t##_end); - -// CHECK: hipEventRecord(t##_start); \ -// CHECK: hipEventSynchronize(t##_start); -#define TIMER_START(t) \ - cudaEventRecord(t##_start); \ - cudaEventSynchronize(t##_start); \ - -// CHECK: hipEventRecord(t##_start); \ -// CHECK: hipEventSynchronize(t##_start); \ -// CHECK: hipEventRecord(t##_end); \ -// CHECK: hipEventSynchronize(t##_end); \ -// CHECK: hipEventElapsedTime(&t, t##_start, t##_end); -#define TIMER_END(t) \ - cudaEventRecord(t##_start); \ - cudaEventSynchronize(t##_start); \ - cudaEventRecord(t##_end); \ - cudaEventSynchronize(t##_end); \ - cudaEventElapsedTime(&t, t##_start, t##_end); - - -inline void cErrorCheck(const char *file, int line) { -// CHECK: hipDeviceSynchronize(); -// CHECK: hipError_t err = hipGetLastError(); -// CHECK: if (err != hipSuccess) { -// CHECK: printf("Error: %s\n", hipGetErrorString(err)); - cudaThreadSynchronize(); - cudaError_t err = cudaGetLastError(); - if (err != cudaSuccess) { - printf("Error: %s\n", cudaGetErrorString(err)); - printf(" @ %s: %d\n", file, line); - exit(-1); - } -} - -inline dim3 K_GRID(int n, int threads = K_THREADS) { - int blocks = (int)ceilf(sqrtf((float)n/threads)); - dim3 grid(blocks, blocks); - return grid; -} - -typedef struct data { - int n; - float4 *r, *v, *f; -} data; - -data cpu, gpu; - -#define N 20 - -__global__ void repulsion(data gpu); -__global__ void integration(data gpu); - - -int main() { - printf("Cuda Test 1\n"); - - int count = 0; - // CHECK: hipGetDeviceCount(&count); - cudaGetDeviceCount(&count); - printf(" %d CUDA devices found\n", count); - if(!count) { - ::exit(EXIT_FAILURE); - } - // CHECK: hipFree(0); - cudaFree(0); - - cpu.n = N; - - cpu.r = (float4*)malloc(N * sizeof(float4)); - cpu.v = (float4*)malloc(N * sizeof(float4)); - cpu.f = (float4*)malloc(N * sizeof(float4)); - - for(int i = 0; i < N; ++i) { - cpu.v[i] = make_float4(0,0,0,0); - cpu.r[i] = make_float4(RND(), RND(), RND(), 0); - cpu.f[i] = make_float4(0,0.01,0,0); - } - - gpu = cpu; - // CHECK: hipMalloc(&gpu.r, N * sizeof(float4)); - // CHECK: hipMalloc(&gpu.v, N * sizeof(float4)); - // CHECK: hipMalloc(&gpu.f, N * sizeof(float4)); - cudaMalloc(&gpu.r, N * sizeof(float4)); - cudaMalloc(&gpu.v, N * sizeof(float4)); - cudaMalloc(&gpu.f, N * sizeof(float4)); - // CHECK: hipMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - // CHECK: hipMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - // CHECK: hipMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), hipMemcpyHostToDevice); - cudaMemcpy(gpu.r, cpu.r, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - cudaMemcpy(gpu.v, cpu.v, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - cudaMemcpy(gpu.f, cpu.f, cpu.n * sizeof(float4), cudaMemcpyHostToDevice); - - ERRORCHECK(); - float rep; - TIMER_CREATE(rep); - TIMER_START(rep); - // CHECK: hipLaunchKernelGGL(integration, dim3(K_GRID(cpu.n)), dim3(K_THREADS), 0, 0, gpu); - integration <<< K_GRID(cpu.n), K_THREADS >>>(gpu); - - TIMER_END(rep); - printf("Took: %f ms\n", rep); - ERRORCHECK(); - // CHECK: hipMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - // CHECK: hipMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - // CHECK: hipMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), hipMemcpyDeviceToHost); - cudaMemcpy(cpu.r, gpu.r, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - cudaMemcpy(cpu.v, gpu.v, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - cudaMemcpy(cpu.f, gpu.f, cpu.n * sizeof(float4), cudaMemcpyDeviceToHost); - // CHECK: hipHostFree(cpu.r); - // CHECK: hipHostFree(cpu.v); - // CHECK: hipHostFree(cpu.f); - cudaFreeHost(cpu.r); - cudaFreeHost(cpu.v); - cudaFreeHost(cpu.f); - // CHECK: hipFree(gpu.r); - // CHECK: hipFree(gpu.v); - // CHECK: hipFree(gpu.f); - cudaFree(gpu.r); - cudaFree(gpu.v); - cudaFree(gpu.f); - // CHECK: hipDeviceReset(); - cudaDeviceReset(); - - printf("Results: \n"); - for(int i = 0; i < N; ++i) { - printf("%f, %f, %f \n", cpu.r[i].x, cpu.r[i].y, cpu.r[i].z); - } - - printf("Ready...\n"); - return 0; -} - -__global__ void repulsion(data gpu) { - int idx = K_INDEX(); - if(idx < N) { - gpu.r[idx].x = 1; - gpu.r[idx].y = 1; - gpu.r[idx].z = 1; - } -} - -#define MULT4(v, s) v.x *= s; v.y *= s; v.z *= s; v.w *= s; -#define ADD4(v1, v2) v1.x += v2.x; v1.y += v2.y; v1.z += v2.z; v1.w += v2.w; - -__global__ void integration(data gpu) { - int i = K_INDEX(); - if(i < N) { - MULT4(gpu.f[i], 0.01); - MULT4(gpu.v[i], 0.01); - ADD4(gpu.v[i], gpu.f[i]); - ADD4(gpu.r[i], gpu.v[i]); - gpu.f[i] = make_float4(0,0,0,0); - } -} diff --git a/tests/hipify-clang/unit_tests/samples/square.cu b/tests/hipify-clang/unit_tests/samples/square.cu deleted file mode 100644 index b415c15495..0000000000 --- a/tests/hipify-clang/unit_tests/samples/square.cu +++ /dev/null @@ -1,112 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -/* -Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#include -#include - -#define CHECK(cmd) \ -{\ - cudaError_t error = cmd;\ - if (error != cudaSuccess) { \ - fprintf(stderr, "error: '%s'(%d) at %s:%d\n", cudaGetErrorString(error), error,__FILE__, __LINE__); \ - exit(EXIT_FAILURE);\ - }\ -} - - -/* - * Square each element in the array A and write to array C. - */ -template -__global__ void -vector_square(T *C_d, const T *A_d, size_t N) -{ - size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); - size_t stride = blockDim.x * gridDim.x; - - for (size_t i=offset; i>> (C_d, A_d, N); - - printf ("info: copy Device2Host\n"); - // CHECK: CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); - CHECK ( cudaMemcpy(C_h, C_d, Nbytes, cudaMemcpyDeviceToHost)); - - printf ("info: check result\n"); - for (size_t i=0; i -// CHECK: #include -#include - -__global__ void staticReverse(int *d, int n) -{ - // CHECK-NOT: HIP_DYNAMIC_SHARED(int, s); - // CHECK: __shared__ int s[64]; - __shared__ int s[64]; - int t = threadIdx.x; - int tr = n-t-1; - s[t] = d[t]; - // Will not conttinue until all threads completed. - __syncthreads(); - d[t] = s[tr]; -} - -int main(void) -{ - const int n = 64; - int a[n], r[n], d[n]; - - for (int i = 0; i < n; i++) { - a[i] = i; - r[i] = n-i-1; - d[i] = 0; - } - - int *d_d; - // CHECK: hipMalloc(&d_d, n * sizeof(int)); - cudaMalloc(&d_d, n * sizeof(int)); - // run version with static shared memory - // CHECK: hipMemcpy(d_d, a, n*sizeof(int), hipMemcpyHostToDevice); - cudaMemcpy(d_d, a, n*sizeof(int), cudaMemcpyHostToDevice); - // CHECK: hipLaunchKernelGGL(staticReverse, dim3(1), dim3(n), 0, 0, d_d, n); - staticReverse<<<1,n>>>(d_d, n); - // CHECK: hipMemcpy(d, d_d, n*sizeof(int), hipMemcpyDeviceToHost); - cudaMemcpy(d, d_d, n*sizeof(int), cudaMemcpyDeviceToHost); - for (int i = 0; i < n; i++) - if (d[i] != r[i]) printf("Error: d[%d]!=r[%d] (%d, %d)n", i, i, d[i], r[i]); -} diff --git a/tests/hipify-clang/unit_tests/samples/vec_add.cu b/tests/hipify-clang/unit_tests/samples/vec_add.cu deleted file mode 100644 index a6d8950e0b..0000000000 --- a/tests/hipify-clang/unit_tests/samples/vec_add.cu +++ /dev/null @@ -1,89 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args - -// Kernel definition -__global__ void vecAdd(float* A, float* B, float* C) -{ - int i = threadIdx.x; - A[i] = 0; - B[i] = i; - C[i] = A[i] + B[i]; -} -// CHECK: #include -#include -#define SIZE 10 -#define KERNELINVOKES 5000000 -int vecadd(int gpudevice, int rank) -{ - int devcheck(int, int); - devcheck(gpudevice, rank); - float A[SIZE], B[SIZE], C[SIZE]; - // Kernel invocation - float *devPtrA; - float *devPtrB; - float *devPtrC; - int memsize = SIZE * sizeof(float); - // CHECK: hipMalloc((void**)&devPtrA, memsize); - // CHECK: hipMalloc((void**)&devPtrB, memsize); - // CHECK: hipMalloc((void**)&devPtrC, memsize); - cudaMalloc((void**)&devPtrA, memsize); - cudaMalloc((void**)&devPtrB, memsize); - cudaMalloc((void**)&devPtrC, memsize); - // CHECK: hipMemcpy(devPtrA, A, memsize, hipMemcpyHostToDevice); - // CHECK: hipMemcpy(devPtrB, B, memsize, hipMemcpyHostToDevice); - cudaMemcpy(devPtrA, A, memsize, cudaMemcpyHostToDevice); - cudaMemcpy(devPtrB, B, memsize, cudaMemcpyHostToDevice); - for (int i = 0; i>>(devPtrA, devPtrB, devPtrC); - } - // CHECK: hipMemcpy(C, devPtrC, memsize, hipMemcpyDeviceToHost); - cudaMemcpy(C, devPtrC, memsize, cudaMemcpyDeviceToHost); - // calculate only up to gpudevice to show the unique output - // of each rank's kernel launch - for (int i = 0; i= device_count) - { - printf("gpudevice >= device_count ... exiting\n"); - exit(1); - } - // CHECK: hipError_t cudareturn; - // CHECK: hipDeviceProp_t deviceProp; - // CHECK: hipGetDeviceProperties(&deviceProp, gpudevice); - cudaError_t cudareturn; - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, gpudevice); - if (deviceProp.warpSize <= 1) - { - printf("rank %d: warning, CUDA Device Emulation (CPU) detected, exiting\n", rank); - exit(1); - } - // CHECK: cudareturn = hipSetDevice(gpudevice); - cudareturn = cudaSetDevice(gpudevice); - // CHECK: if (cudareturn == hipErrorInvalidDevice) - if (cudareturn == cudaErrorInvalidDevice) - { - // CHECK: perror("hipSetDevice returned hipErrorInvalidDevice"); - perror("cudaSetDevice returned cudaErrorInvalidDevice"); - } - else - { - // CHECK: hipGetDevice(&device); - cudaGetDevice(&device); - printf("rank %d: cudaGetDevice()=%d\n", rank, device); - } -} diff --git a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp similarity index 62% rename from tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp rename to tests/src/Negative/memory/hipMemcpyFromSymbol.cpp index d5dffd0b09..10f8c51a6d 100644 --- a/tests/hipify-clang/unit_tests/samples/2_Cookbook/11_texture_driver/tex2dKernel.cpp +++ b/tests/src/Negative/memory/hipMemcpyFromSymbol.cpp @@ -1,17 +1,13 @@ -// RUN: %run_test hipify "%s" "%t" %hipify_args %clang_args /* -Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -21,16 +17,30 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -// CHECK: #include -#include -// CHECK-NOT: #include -#include +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ -// CHECK: extern texture tex; -extern texture tex; +#include "test_common.h" +#define SIZE 1024 -extern "C" __global__ void tex2dKernel(float* outputData, int width, int height) { - int x = blockDim.x * blockIdx.x + threadIdx.x; - int y = blockDim.y * blockIdx.y + threadIdx.y; - outputData[y * width + x] = tex2D(tex, x, y); +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyFromSymbol(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbol(S, NULL, SIZE, 0, hipMemcpyDeviceToHost); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); } diff --git a/hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp similarity index 59% rename from hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp rename to tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp index 4791cffeee..fa341c6cea 100644 --- a/hipify-clang/src/CUDA2HIP_CAFFE2_API_types.cpp +++ b/tests/src/Negative/memory/hipMemcpyFromSymbolAsync.cpp @@ -1,16 +1,13 @@ /* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,15 +17,33 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "CUDA2HIP.h" +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ -// Map of all types -const std::map CUDA_CAFFE2_TYPE_NAME_MAP{ +#include "test_common.h" +#define SIZE 1024 - // 5. Defines - {"REGISTER_CUDA_OPERATOR", {"REGISTER_HIP_OPERATOR", "", CONV_DEFINE, API_CAFFE2}}, - {"REGISTER_CUDA_OPERATOR_CREATOR", {"REGISTER_HIP_OPERATOR_CREATOR", "", CONV_DEFINE, API_CAFFE2}}, +int main(){ - // 6. Classes - {"CUDAContext", {"HIPContext", "", CONV_TYPE, API_CAFFE2}}, -}; + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyFromSymbolAsync(S, HIP_SYMBOL(Sd), SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyFromSymbolAsync(S, NULL, SIZE, 0, hipMemcpyDeviceToHost, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/hipify-clang/src/CUDA2HIP_CUB_API_types.cpp b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp similarity index 62% rename from hipify-clang/src/CUDA2HIP_CUB_API_types.cpp rename to tests/src/Negative/memory/hipMemcpyToSymbol.cpp index 0ef1912b54..8626c2c34f 100644 --- a/hipify-clang/src/CUDA2HIP_CUB_API_types.cpp +++ b/tests/src/Negative/memory/hipMemcpyToSymbol.cpp @@ -1,16 +1,13 @@ /* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,9 +17,30 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "CUDA2HIP.h" +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ -// Maps the names of CUDA CUB API types to the corresponding HIP types -const std::map CUDA_CUB_TYPE_NAME_MAP{ - {"cub", {"hipcub", "", CONV_TYPE, API_CUB}}, -}; +#include "test_common.h" +#define SIZE 1024 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + e = hipMemcpyToSymbol(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbol(NULL, S, SIZE, 0, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp new file mode 100644 index 0000000000..832e4336be --- /dev/null +++ b/tests/src/Negative/memory/hipMemcpyToSymbolAsync.cpp @@ -0,0 +1,49 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * TEST: %t + * HIT_END + */ + +#include "test_common.h" +#define SIZE 100 + +int main(){ + + void *Sd; + hipError_t e; + char S[SIZE]="This is not a device symbol"; + + HIPCHECK(hipMalloc(&Sd,SIZE)); + + hipStream_t stream; + HIPCHECK(hipStreamCreate(&stream)); + + e = hipMemcpyToSymbolAsync(HIP_SYMBOL(Sd), S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + e = hipMemcpyToSymbolAsync(NULL, S, SIZE, 0, hipMemcpyHostToDevice, stream); + HIPASSERT(e==hipErrorInvalidSymbol); + + HIPCHECK(hipFree(Sd)); + + passed(); +} diff --git a/hipify-clang/src/CUDA2HIP_Scripting.h b/tests/src/Negative/memory/hipMemory.cpp similarity index 63% rename from hipify-clang/src/CUDA2HIP_Scripting.h rename to tests/src/Negative/memory/hipMemory.cpp index 76b103735b..a71ee948f5 100644 --- a/hipify-clang/src/CUDA2HIP_Scripting.h +++ b/tests/src/Negative/memory/hipMemory.cpp @@ -1,16 +1,13 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,22 +17,27 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#pragma once +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ -extern std::set DeviceSymbolFunctions0; -extern std::set DeviceSymbolFunctions1; -extern std::set ReinterpretFunctions0; -extern std::set ReinterpretFunctions1; +#include "test_common.h" +#define SIZE 100 -extern std::string sHIP_SYMBOL; -extern std::string s_reinterpret_cast; +int main(){ + hipError_t e; + char str[SIZE]="Hi, I am Ellesemere. What is ur name?"; -namespace perl { + e = hipMemcpy(0, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipMemcpy(NULL, str, SIZE, hipMemcpyHostToDevice); + HIPASSERT(e==hipErrorInvalidValue); - bool generate(bool Generate = true); -} - -namespace python { - - bool generate(bool Generate = true); + e = hipMemset(0,99,80); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); } diff --git a/hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp similarity index 69% rename from hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp rename to tests/src/Negative/stream/hipStreamCreateWithFlags.cpp index 63860de262..6f0662b82d 100644 --- a/hipify-clang/src/CUDA2HIP_CAFFE2_API_functions.cpp +++ b/tests/src/Negative/stream/hipStreamCreateWithFlags.cpp @@ -1,16 +1,13 @@ /* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -20,9 +17,24 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#include "CUDA2HIP.h" +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ -// Maps the names of CUDA SPARSE API functions to the corresponding HIP functions -const std::map CUDA_CAFFE2_FUNCTION_MAP{ - {"cuda_stream", {"hip_stream", "", CONV_LIB_FUNC, API_CAFFE2}}, -}; \ No newline at end of file +#include "test_common.h" + +int main(){ + + hipError_t e; + hipStream_t stream; + + e = hipStreamCreateWithFlags(&stream, -1); + HIPASSERT(e==hipErrorInvalidValue); + + e = hipStreamCreateWithFlags(&stream, 2); + HIPASSERT(e==hipErrorInvalidValue); + + passed(); +} diff --git a/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp b/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp new file mode 100644 index 0000000000..f059a564a1 --- /dev/null +++ b/tests/src/Performance/memory/hipPerfMemMallocCpyFree.cpp @@ -0,0 +1,114 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "test_common.h" +#include +#include + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +#define NUM_SIZE 19 //size up to 16M +#define NUM_ITER 500 //Total GPU memory up to 16M*500=8G + +void valSet(int* A, int val, size_t size) { + size_t len = size / sizeof(int); + for (int i = 0; i < len; i++) { + A[i] = val; + } +} + +void setup(size_t *size, const int num, int **pA) { + std::cout << "size: "; + for (int i = 0; i < num; i++) { + size[i] = 1 << (i + 6); + std::cout << size[i] << " "; + } + std::cout << std::endl; + *pA = (int*)malloc(size[num - 1]); + valSet(*pA, 1, size[num - 1]); +} + +void testInit(size_t size, int *A) { + int *Ad; + clock_t start = clock(); + hipMalloc(&Ad, size); //hip::init() will be called + clock_t end = clock(); + double uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "Initial" << std::endl; + std::cout << "hipMalloc(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipMemcpy(Ad, A, size, hipMemcpyHostToDevice); + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipMemcpy(" << size << ") cost " << uS << "us" << std::endl; + + start = clock(); + hipFree(Ad); + end = clock(); + uS = (end - start) * 1000000. / CLOCKS_PER_SEC; + std::cout << "hipFree(" << size << ") cost " << uS << "us" << std::endl; +} + +int main() { + double uS; + clock_t start, end; + size_t size[NUM_SIZE] = { 0 }; + int *Ad[NUM_ITER] = { nullptr }; + int *A; + + setup(size, NUM_SIZE, &A); + testInit(size[0], A); + + for (int i = 0; i < NUM_SIZE; i++) { + std::cout << size[i] << std::endl; + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMalloc(&Ad[j], size[i])); + } + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMalloc(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipMemcpy(Ad[j], A, size[i], hipMemcpyHostToDevice)); + } + hipDeviceSynchronize(); + end = clock(); + uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipMemcpy(" << size[i] << ") cost " << uS << "us" << std::endl; + + start = clock(); + for (int j = 0; j < NUM_ITER; j++) { + HIPCHECK(hipFree(Ad[j])); + Ad[j] = nullptr; + } + end = clock(); + double uS = (end - start) * 1000000. / (NUM_ITER * CLOCKS_PER_SEC); + std::cout << "hipFree(" << size[i] << ") cost " << uS << "us" << std::endl; + } + free(A); + passed(); +} diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp new file mode 100644 index 0000000000..71d8ebbe0a --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfBufferCopyRectSpeed.cpp @@ -0,0 +1,281 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + size_t width = static_cast(sqrt(static_cast(bufSize_))); + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy2D(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy2D failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpy2DAsync(dstBuffer, width, srcBuffer, width, width, width, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync2D failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopyRectSpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp new file mode 100644 index 0000000000..239d47b347 --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfBufferCopySpeed.cpp @@ -0,0 +1,287 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define NUM_SIZES 8 +//4KB, 8KB, 64KB, 256KB, 1 MB, 4MB, 16 MB, 16MB+10 +static const unsigned int Sizes[NUM_SIZES] = {4096, 8192, 65536, 262144, 1048576, 4194304, 16777216, 16777216+10}; + +static const unsigned int Iterations[2] = {1, 1000}; + +#define BUF_TYPES 4 +// 16 ways to combine 4 different buffer types +#define NUM_SUBTESTS (BUF_TYPES*BUF_TYPES) + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +void setData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size ; i++) + { + ptr2[i] = value; + } +} + +void checkData(void *ptr, unsigned int size, char value) +{ + char *ptr2 = (char *)ptr; + for (unsigned int i = 0; i < size; i++) + { + if (ptr2[i] != value) + { + printf("Data validation failed at %d! Got 0x%08x\n", i, ptr2[i]); + printf("Expected 0x%08x\n", value); + CHECK_RESULT(true, "Data validation failed!"); + break; + } + } +} + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + printf("Legend: unp - unpinned(malloc), hM - hipMalloc(device)\n"); + printf(" hHR - hipHostRegister(pinned), hHM - hipHostMalloc(prePinned)\n"); + err = hipSetDevice(p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipSetDevice failed" ); + + unsigned int bufSize_; + bool hostMalloc[2] = {false}; + bool hostRegister[2] = {false}; + bool unpinnedMalloc[2] = {false}; + unsigned int numIter; + void *memptr[2] = {NULL}; + void *alignedmemptr[2] = {NULL}; + void* srcBuffer = NULL; + void* dstBuffer = NULL; + + int numTests = (p_tests == -1) ? (NUM_SIZES*NUM_SUBTESTS*2 - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + for(;test <= numTests; test++) + { + unsigned int srcTest = (test / NUM_SIZES) % BUF_TYPES; + unsigned int dstTest = (test / (NUM_SIZES*BUF_TYPES)) % BUF_TYPES; + bufSize_ = Sizes[test % NUM_SIZES]; + hostMalloc[0] = hostMalloc[1] = false; + hostRegister[0] = hostRegister[1] = false; + unpinnedMalloc[0] = unpinnedMalloc[1] = false; + srcBuffer = dstBuffer = 0; + memptr[0] = memptr[1] = NULL; + alignedmemptr[0] = alignedmemptr[1] = NULL; + + if (srcTest == 3) + { + hostRegister[0] = true; + } + else if (srcTest == 2) + { + hostMalloc[0] = true; + } + else if (srcTest == 1) + { + unpinnedMalloc[0] = true; + } + + if (dstTest == 1) + { + unpinnedMalloc[1] = true; + } + else if (dstTest == 2) + { + hostMalloc[1] = true; + } + else if (dstTest == 3) + { + hostRegister[1] = true; + } + + numIter = Iterations[test / (NUM_SIZES * NUM_SUBTESTS)]; + + if (hostMalloc[0]) + { + err = hipHostMalloc((void**)&srcBuffer, bufSize_, 0); + setData(srcBuffer, bufSize_, 0xd0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + err = hipHostRegister(srcBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[0]) + { + memptr[0] = malloc(bufSize_ + 4096); + alignedmemptr[0] = (void*)(((size_t)memptr[0] + 4095) & ~4095); + srcBuffer = alignedmemptr[0]; + setData(srcBuffer, bufSize_, 0xd0); + } + else + { + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + err = hipMemset(srcBuffer, 0xd0, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMemset failed"); + } + + if (hostMalloc[1]) + { + err = hipHostMalloc((void**)&dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostMalloc failed"); + } + else if (hostRegister[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + err = hipHostRegister(dstBuffer, bufSize_, 0); + CHECK_RESULT(err != hipSuccess, "hipHostRegister failed"); + } + else if (unpinnedMalloc[1]) + { + memptr[1] = malloc(bufSize_ + 4096); + alignedmemptr[1] = (void*)(((size_t)memptr[1] + 4095) & ~4095); + dstBuffer = alignedmemptr[1]; + } + else + { + err = hipMalloc(&dstBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + } + + CPerfCounter timer; + + //warm up + err = hipMemcpy(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < numIter; i++) + { + err = hipMemcpyAsync(dstBuffer, srcBuffer, bufSize_, hipMemcpyDefault, NULL); + CHECK_RESULT(err, "hipMemcpyAsync failed"); + } + err = hipDeviceSynchronize(); + CHECK_RESULT(err, "hipDeviceSynchronize failed"); + timer.Stop(); + double sec = timer.GetElapsedTime(); + + // Buffer copy bandwidth in GB/s + double perf = ((double)bufSize_*numIter*(double)(1e-09)) / sec; + + const char *strSrc = NULL; + const char *strDst = NULL; + if (hostMalloc[0]) + strSrc = "hHM"; + else if (hostRegister[0]) + strSrc = "hHR"; + else if (unpinnedMalloc[0]) + strSrc = "unp"; + else + strSrc = "hM"; + + if (hostMalloc[1]) + strDst = "hHM"; + else if (hostRegister[1]) + strDst = "hHR"; + else if (unpinnedMalloc[1]) + strDst = "unp"; + else + strDst = "hM"; + // Double results when src and dst are both on device + if ((!hostMalloc[0] && !hostRegister[0] && !unpinnedMalloc[0]) && + (!hostMalloc[1] && !hostRegister[1] && !unpinnedMalloc[1])) + perf *= 2.0; + // Double results when src and dst are both in sysmem + if ((hostMalloc[0] || hostRegister[0] || unpinnedMalloc[0]) && + (hostMalloc[1] || hostRegister[1] || unpinnedMalloc[1])) + perf *= 2.0; + + char buf[256]; + SNPRINTF(buf, sizeof(buf), "HIPPerfBufferCopySpeed[%d]\t(%8d bytes)\ts:%s d:%s\ti:%4d\t(GB/s) perf\t%f", + test, bufSize_, strSrc, strDst, numIter, (float)perf); + printf("%s\n", buf); + + // Verification + void* temp = malloc(bufSize_ + 4096); + void* chkBuf = (void*)(((size_t)temp + 4095) & ~4095); + err = hipMemcpy(chkBuf, dstBuffer, bufSize_, hipMemcpyDefault); + CHECK_RESULT(err, "hipMemcpy failed"); + checkData(chkBuf, bufSize_, 0xd0); + free(temp); + + //Free src + if (hostMalloc[0]) + { + hipHostFree(srcBuffer); + } + else if (hostRegister[0]) + { + hipHostUnregister(srcBuffer); + free(memptr[0]); + } + else if (unpinnedMalloc[0]) + { + free(memptr[0]); + } + else + { + hipFree(srcBuffer); + } + + //Free dst + if (hostMalloc[1]) + { + hipHostFree(dstBuffer); + } + else if (hostRegister[1]) + { + hipHostUnregister(dstBuffer); + free(memptr[1]); + } + else if (unpinnedMalloc[1]) + { + free(memptr[1]); + } + else + { + hipFree(dstBuffer); + } + } + + passed(); +} diff --git a/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp b/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp new file mode 100644 index 0000000000..3d14c7b95f --- /dev/null +++ b/tests/src/Performance/perfDispatch/hipPerfDispatchSpeed.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include + +#include "timer.h" +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp timer.cpp EXCLUDE_HIP_PLATFORM nvcc + * TEST: %t + * HIT_END + */ + +// Quiet pesky warnings +#ifdef WIN_OS +#define SNPRINTF sprintf_s +#else +#define SNPRINTF snprintf +#endif + +#define CHAR_BUF_SIZE 512 + +#define CHECK_RESULT(test, msg) \ + if ((test)) \ + { \ + printf("\n%s\n", msg); \ + abort(); \ + } + +typedef struct { + unsigned int iterations; + int flushEvery; +} testStruct; + +testStruct testList[] = +{ + { 1, -1}, + { 1, -1}, + { 10, 1}, + { 10, -1}, + { 100, 1}, + { 100, 10}, + { 100, -1}, + { 1000, 1}, + { 1000, 10}, + { 1000, 100}, + { 1000, -1}, + { 10000, 1}, + { 10000, 10}, + { 10000, 100}, + { 10000, 1000}, + { 10000, -1}, + { 100000, 1}, + { 100000, 10}, + { 100000, 100}, + { 100000, 1000}, + { 100000, 10000}, + { 100000, -1}, +}; + +unsigned int mapTestList[] = {1, 1, 10, 100, 1000, 10000, 100000}; + +__global__ void _dispatchSpeed(float *outBuf) +{ + int i = (blockIdx.x * blockDim.x + threadIdx.x); + if (i < 0) + outBuf[i] = 0.0f; +}; + + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + hipError_t err = hipSuccess; + hipDeviceProp_t props = {0}; + hipGetDeviceProperties(&props, p_gpuDevice); + CHECK_RESULT(err != hipSuccess, "hipGetDeviceProperties failed" ); + printf("Set device to %d : %s\n", p_gpuDevice, props.name); + + unsigned int testListSize = sizeof(testList) / sizeof(testStruct); + int numTests = (p_tests == -1) ? (2*2*testListSize - 1) : p_tests; + int test = (p_tests == -1) ? 0 : p_tests; + + float* srcBuffer = NULL; + unsigned int bufSize_ = 64*sizeof(float); + err = hipMalloc(&srcBuffer, bufSize_); + CHECK_RESULT(err != hipSuccess, "hipMalloc failed"); + + for(;test <= numTests; test++) + { + int openTest = test % testListSize; + bool sleep = false; + bool doWarmup = false; + + if ((test / testListSize) % 2) + { + doWarmup = true; + } + if (test >= (testListSize * 2)) + { + sleep = true; + } + + int threads = (bufSize_ / sizeof(float)); + int threads_per_block = 64; + int blocks = (threads/threads_per_block) + (threads % threads_per_block); + hipEvent_t start, stop; + + // NULL stream check: + err = hipEventCreate(&start); + err = hipEventCreate(&stop); + + CHECK_RESULT(err != hipSuccess, "hipEventCreate failed"); + + if (doWarmup) + { + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + + CPerfCounter timer; + + timer.Reset(); + timer.Start(); + for (unsigned int i = 0; i < testList[openTest].iterations; i++) + { + hipEventRecord(start, NULL); + hipLaunchKernelGGL(_dispatchSpeed, dim3(blocks), dim3(threads_per_block), 0, hipStream_t(0), srcBuffer); + hipEventRecord(stop, NULL); + + if ((testList[openTest].flushEvery > 0) && + (((i + 1) % testList[openTest].flushEvery) == 0)) + { + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + } + } + if (sleep) + { + err = hipDeviceSynchronize(); + CHECK_RESULT(err != hipSuccess, "hipDeviceSynchronize failed"); + } + else + { + do { + err = hipEventQuery(stop); + } while (err == hipErrorNotReady); + } + timer.Stop(); + + hipEventDestroy(start); + hipEventDestroy(stop); + double sec = timer.GetElapsedTime(); + + // microseconds per launch + double perf = (1000000.f*sec/testList[openTest].iterations); + const char *waitType; + const char *extraChar; + const char *n; + const char *warmup; + if (sleep) + { + waitType = "sleep"; + extraChar = ""; + n = ""; + } + else + { + waitType = "spin"; + n = "n"; + extraChar = " "; + } + if (doWarmup) + { + warmup = "warmup"; + } + else + { + warmup = ""; + } + + + char buf[256]; + if (testList[openTest].flushEvery > 0) + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches %s%sing every %5d %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, n, testList[openTest].flushEvery, warmup, (float)perf); + } + else + { + SNPRINTF(buf, sizeof(buf), "HIPPerfDispatchSpeed[%3d] %7d dispatches (%s%s) %6s (us/disp) %3f", test, testList[openTest].iterations, + waitType, extraChar, warmup, (float)perf); + } + printf("%s\n", buf); + } + + hipFree(srcBuffer); + passed(); +} diff --git a/tests/src/Performance/perfDispatch/timer.cpp b/tests/src/Performance/perfDispatch/timer.cpp new file mode 100644 index 0000000000..ea9c6ea1d9 --- /dev/null +++ b/tests/src/Performance/perfDispatch/timer.cpp @@ -0,0 +1,116 @@ +#include "timer.h" + +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define VC_EXTRALEAN +#include +#pragma comment(lib, "user32") +#endif + +#ifdef __linux__ +#include +#define NANOSECONDS_PER_SEC 1000000000 +#endif + +CPerfCounter::CPerfCounter() : _clocks(0), _start(0) +{ + +#ifdef _WIN32 + + QueryPerformanceFrequency((LARGE_INTEGER *)&_freq); + +#endif + +#ifdef __linux__ + _freq = NANOSECONDS_PER_SEC; +#endif + +} + +CPerfCounter::~CPerfCounter() +{ + // EMPTY! +} + +void +CPerfCounter::Start(void) +{ + +#ifdef _WIN32 + + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Start", "Error", MB_OK); + exit(0); + } + QueryPerformanceCounter((LARGE_INTEGER *)&_start); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + _start = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + +} + +void +CPerfCounter::Stop(void) +{ + i64 n; + +#ifdef _WIN32 + + if( !_start ) + { + MessageBox(NULL, "Bad Perf Counter Stop", "Error", MB_OK); + exit(0); + } + + QueryPerformanceCounter((LARGE_INTEGER *)&n); + +#endif +#ifdef __linux__ + + struct timespec s; + clock_gettime(CLOCK_MONOTONIC, &s); + n = (i64)s.tv_sec * NANOSECONDS_PER_SEC + (i64)s.tv_nsec ; + +#endif + + n -= _start; + _start = 0; + _clocks += n; +} + +void +CPerfCounter::Reset(void) +{ + +#ifdef _WIN32 + if( _start ) + { + MessageBox(NULL, "Bad Perf Counter Reset", "Error", MB_OK); + exit(0); + } +#endif + _clocks = 0; +} + +double +CPerfCounter::GetElapsedTime(void) +{ +#ifdef _WIN32 + if( _start ) { + MessageBox(NULL, "Trying to get time while still running.", "Error", MB_OK); + exit(0); + } +#endif + + return (double)_clocks / (double)_freq; + +} diff --git a/tests/src/Performance/perfDispatch/timer.h b/tests/src/Performance/perfDispatch/timer.h new file mode 100644 index 0000000000..28bfeff74b --- /dev/null +++ b/tests/src/Performance/perfDispatch/timer.h @@ -0,0 +1,28 @@ +#ifndef _TIMER_H_ +#define _TIMER_H_ + +#ifdef _WIN32 +typedef __int64 i64 ; +#endif +#ifdef __linux__ +typedef long long i64; +#endif + +class CPerfCounter { + +public: + CPerfCounter(); + ~CPerfCounter(); + void Start(void); + void Stop(void); + void Reset(void); + double GetElapsedTime(void); + +private: + + i64 _freq; + i64 _clocks; + i64 _start; +}; + +#endif // _TIMER_H_ diff --git a/tests/src/deviceLib/hipBfloat16.cpp b/tests/src/deviceLib/hipBfloat16.cpp new file mode 100644 index 0000000000..306d995ffe --- /dev/null +++ b/tests/src/deviceLib/hipBfloat16.cpp @@ -0,0 +1,137 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ +#include "test_common.h" +#include +#include +#include +#include +#include + +#define SIZE 100 +using namespace std; + +static random_device dev; +static mt19937 rng(dev()); + +inline float getRandomFloat(long min = 10, long max = LONG_MAX) { + uniform_real_distribution gen(min, max); + return gen(rng); +} + +__host__ __device__ bool testRelativeAccuracy(float a, hip_bfloat16 b) { + float c = float(b); + // float relative error should be less than 1/(2^7) since bfloat16 + // has 7 bits mantissa. + if(fabs(c - a) / a <= 1.0 / 128){ + return true; + } + return false; +} + +__host__ __device__ void testOperations(float &fa, float &fb) { + + hip_bfloat16 bf_a(fa); + hip_bfloat16 bf_b(fb); + float fc = float(bf_a); + float fd = float(bf_b); + + assert(testRelativeAccuracy(fa, bf_a)); + assert(testRelativeAccuracy(fb, bf_b)); + + assert(testRelativeAccuracy(fc + fd, bf_a + bf_b)); + //when checked as above for add, operation sub fails on GPU + assert(hip_bfloat16(fc - fd) == (bf_a - bf_b)); + assert(testRelativeAccuracy(fc * fd, bf_a * bf_b)); + assert(testRelativeAccuracy(fc / fd, bf_a / bf_b)); + + hip_bfloat16 bf_opNegate = -bf_a; + assert(bf_opNegate == -bf_a); + + hip_bfloat16 bf_x; + bf_x = bf_a; + bf_x++; + bf_x--; + ++bf_x; + --bf_x; + //hip_bfloat16 is converted to float and then inc/decremented, hence check with reduced precision + assert(testRelativeAccuracy(bf_x,bf_a)); + + bf_x = bf_a; + bf_x += bf_b; + assert(bf_x == (bf_a + bf_b)); + bf_x = bf_a; + bf_x -= bf_b; + assert(bf_x == (bf_a - bf_b)); + bf_x = bf_a; + bf_x *= bf_b; + assert(bf_x == (bf_a * bf_b)); + bf_x = bf_a; + bf_x /= bf_b; + assert(bf_x == (bf_a / bf_b)); + + hip_bfloat16 bf_rounded = hip_bfloat16::round_to_bfloat16(fa); + if (isnan(bf_rounded)) { + assert(isnan(bf_rounded) || isinf(bf_rounded)); + } +} + +__global__ void testOperationsGPU(float* d_a, float* d_b) +{ + int id = threadIdx.x; + if (id > SIZE) return; + float &a = d_a[id]; + float &b = d_b[id]; + testOperations(a, b); +} + +int main(){ + float *h_fa, *h_fb; + float *d_fa, *d_fb; + + h_fa = new float[SIZE]; + h_fb = new float[SIZE]; + for (int i = 0; i < SIZE; i++) { + h_fa[i] = getRandomFloat(); + h_fb[i] = getRandomFloat(); + testOperations(h_fa[i], h_fb[i]); + } + cout<<"Host bfloat16 Operations Successful!!"< +__global__ void kernel_simple(F f, T *out) { + *out = f(); +} + +template +void check_simple(F f, T expected, const char* file, unsigned line) { + auto memsize = sizeof(T); + T *outputCPU = (T *) malloc(memsize); + T *outputGPU = nullptr; + hipMalloc((void**)&outputGPU, memsize); + hipLaunchKernelGGL(kernel_simple, 1, 1, 0, 0, f, outputGPU); + hipMemcpy(outputCPU, outputGPU, memsize, hipMemcpyDeviceToHost); + if (*outputCPU != expected) { + failed("%s line %u : check failed (output = %lf, expected = %lf)\n", + file, line, (double)(*outputCPU), (double)expected); + } + hipFree(outputGPU); + free(outputCPU); +} +#define CHECK_SIMPLE(lambda, expected) \ + check_simple(lambda, expected, __FILE__, __LINE__); + +void test_fp16() { + CHECK_SIMPLE([]__device__(){ return max<__fp16>(1.0f, 2.0f); }, 2.0f); + CHECK_SIMPLE([]__device__(){ return min<__fp16>(1.0f, 2.0f); }, 1.0f); +} + int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, true); @@ -165,5 +193,7 @@ int main(int argc, char* argv[]) { // check_lgamma_double(); + test_fp16(); + passed(); } diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp index 751d44e242..64a9f7fa63 100644 --- a/tests/src/deviceLib/hipTestHalf.cpp +++ b/tests/src/deviceLib/hipTestHalf.cpp @@ -96,6 +96,18 @@ void kernel_hisinf(__half* input, int* output) { output[tx] = __hisinf(input[tx]); } +__global__ void testHalfAbs(float* p) { + auto a = __float2half(*p); + a = __habs(a); + *p = __half2float(a); +} + +__global__ void testHalf2Abs(float2* p) { + auto a = __float22half2_rn(*p); + a = __habs2(a); + *p = __half22float2(a); +} + #endif @@ -237,6 +249,31 @@ void checkFunctional() { return; } +void checkHalfAbs() { + { + float *p; + hipMalloc(&p, sizeof(float)); + float pp = -2.1f; + hipMemcpy(p, &pp, sizeof(float), hipMemcpyDefault); + hipLaunchKernelGGL(testHalfAbs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float), hipMemcpyDefault); + hipFree(p); + if(pp < 0.0f) { failed("Half Abs failed"); } + } + { + float2 *p; + hipMalloc(&p, sizeof(float2)); + float2 pp; + pp.x = -2.1f; + pp.y = -1.1f; + hipMemcpy(p, &pp, sizeof(float2), hipMemcpyDefault); + hipLaunchKernelGGL(testHalf2Abs, 1, 1, 0, 0, p); + hipMemcpy(&pp, p, sizeof(float2), hipMemcpyDefault); + hipFree(p); + if(pp.x < 0.0f || pp.y < 0.0f) { failed("Half2 Abs Test Failed"); } + } +} + int main() { bool* result{nullptr}; hipMemAllocHost((void**)&result, sizeof(result)); @@ -260,5 +297,7 @@ int main() { // run some functional checks checkFunctional(); + checkHalfAbs(); + passed(); } diff --git a/tests/src/deviceLib/hip_floatnTM.cpp b/tests/src/deviceLib/hip_floatnTM.cpp new file mode 100644 index 0000000000..921933636f --- /dev/null +++ b/tests/src/deviceLib/hip_floatnTM.cpp @@ -0,0 +1,239 @@ +/* +Copyright (c) 2015-2019 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc HIPCC_OPTIONS -std=c++14 + * TEST: %t + * HIT_END + */ + +#include +#include +#include +#include "test_common.h" + +static std::random_device dev; +static std::mt19937 rng(dev()); + +template +__host__ __device__ inline constexpr int count() { + return sizeof(T) / sizeof(M); +} + +inline float getRandomFloat(float min = 10, float max = 100) { + std::uniform_real_distribution gen(min, max); + return gen(rng); +} + +template +void fillMatrix(T* a, int size) { + for (int i = 0; i < size; i++) { + T t; + t.x = getRandomFloat(); + if constexpr (count() >= 2) t.y = getRandomFloat(); + if constexpr (count() >= 3) t.z = getRandomFloat(); + if constexpr (count() >= 4) t.w = getRandomFloat(); + + a[i] = t; + } +} + +// Test operations +template +__host__ __device__ void testOperations(T& a, T& b) { + a.x += b.x; + a.x++; + b.x++; + if constexpr (count() >= 2) { + a.y = b.x; + a.x = b.y; + } + if constexpr (count() >= 3) { + if (a.x > 0) b.x /= a.x; + a.x *= b.z; + a.y--; + } + if constexpr (count() >= 4) { + b.w = a.x; + a.w += (-b.y); + } +} + +template +__global__ void testOperationsGPU(T* d_a, T* d_b, int size) { + int id = threadIdx.x; + if (id > size) return; + T &a = d_a[id]; + T &b = d_b[id]; + + testOperations(a, b); +} + + +template +void dcopy(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + a[i] = b[i]; + } +} + +template +bool isEqual(T* a, T* b, int size) { + for (int i = 0; i < size; i++) { + if (a[i] != b[i]) { + return false; + } + } + return true; +} + +// Main function that tests type +// T = what you want to test +// D = pack of 1 i.e. float1 int1 +template +void testType(int msize) { + T *fa, *fb, *fc, *h_fa, *h_fb; + fa = new T[msize]; + fb = new T[msize]; + fc = new T[msize]; + h_fa = new T[msize]; + h_fb = new T[msize]; + + T *d_fa, *d_fb; + + constexpr int c = count(); + + if (c <= 0 || c >= 5) { + failed("Invalid Size\n"); + } + + fillMatrix(fa, msize); + dcopy(fb, fa, msize); + dcopy(h_fa, fa, msize); + dcopy(h_fb, fa, msize); + for (int i = 0; i < msize; i++) testOperations(h_fa[i], h_fb[i]); + + hipMalloc(&d_fa, sizeof(T) * msize); + hipMalloc(&d_fb, sizeof(T) * msize); + + hipMemcpy(d_fa, fa, sizeof(T) * msize, hipMemcpyHostToDevice); + hipMemcpy(d_fb, fb, sizeof(T) * msize, hipMemcpyHostToDevice); + + auto kernel = testOperationsGPU; + hipLaunchKernelGGL(kernel, 1, msize, 0, 0, d_fa, d_fb, msize); + + hipMemcpy(fc, d_fa, sizeof(T) * msize, hipMemcpyDeviceToHost); + + bool pass = true; + if (!isEqual(h_fa, fc, msize)) { + pass = false; + } + + delete[] fa; + delete[] fb; + delete[] fc; + delete[] h_fa; + delete[] h_fb; + hipFree(d_fa); + hipFree(d_fb); + + if (!pass) { + failed("Failed"); + } +} + +int main() { + const int msize = 100; + // double + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // floats + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // chars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // long + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // longlong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // short + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uints + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // uchars + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ulonglong + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + // ushort + testType(msize); + testType(msize); + testType(msize); + testType(msize); + + passed(); +} diff --git a/tests/src/hiprtc/hiprtcGetLoweredName.cpp b/tests/src/hiprtc/hiprtcGetLoweredName.cpp index e7b88d26d2..a63e13af64 100644 --- a/tests/src/hiprtc/hiprtcGetLoweredName.cpp +++ b/tests/src/hiprtc/hiprtcGetLoweredName.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/hiprtc/saxpy.cpp b/tests/src/hiprtc/saxpy.cpp index d063578757..a08c1c2399 100755 --- a/tests/src/hiprtc/saxpy.cpp +++ b/tests/src/hiprtc/saxpy.cpp @@ -20,7 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp LINK_OPTIONS hiprtc EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/hostcall/hipHostcallFuncCall.cpp b/tests/src/hostcall/hipHostcallFuncCall.cpp index d849e69788..15ac3e1f4e 100644 --- a/tests/src/hostcall/hipHostcallFuncCall.cpp +++ b/tests/src/hostcall/hipHostcallFuncCall.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/hostcall/hipHostcallPrintThings.cpp b/tests/src/hostcall/hipHostcallPrintThings.cpp index 8e3ebdae78..3d1a0c4e07 100644 --- a/tests/src/hostcall/hipHostcallPrintThings.cpp +++ b/tests/src/hostcall/hipHostcallPrintThings.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/kernel/hipLaunchParm.cpp b/tests/src/kernel/hipLaunchParm.cpp index 23742d7d6c..797bc84ced 100644 --- a/tests/src/kernel/hipLaunchParm.cpp +++ b/tests/src/kernel/hipLaunchParm.cpp @@ -58,12 +58,10 @@ static const int BLOCK_DIM_SIZE = 512; // allocate memory on device and host for result validation static bool *result_d, *result_h; -static hipError_t hipMallocError = hipMalloc((void**)&result_d, - BLOCK_DIM_SIZE*sizeof(bool)); -static hipError_t hipHostMallocError = hipHostMalloc((void**)&result_h, - BLOCK_DIM_SIZE*sizeof(bool)); -static hipError_t hipMemsetError = hipMemset(result_d, - false, BLOCK_DIM_SIZE); + +static hipError_t hipMallocError = hipErrorUnknown; +static hipError_t hipHostMallocError = hipErrorUnknown; +static hipError_t hipMemsetError = hipErrorUnknown; static void ResultValidation() { hipMemcpy(result_h, result_d, BLOCK_DIM_SIZE*sizeof(bool), @@ -600,6 +598,10 @@ __global__ void vAdd(float* a) {} int main() { + hipMallocError = hipMalloc((void**)&result_d, BLOCK_DIM_SIZE*sizeof(bool)); + hipHostMallocError = hipHostMalloc((void**)&result_h, BLOCK_DIM_SIZE*sizeof(bool)); + hipMemsetError = hipMemset(result_d, false, BLOCK_DIM_SIZE); + // Validating memory & initial value, for result_d, result_h HIPASSERT(hipMallocError == hipSuccess); HIPASSERT(hipHostMallocError == hipSuccess); diff --git a/tests/src/printf/hipPrintfAltForms.cpp b/tests/src/printf/hipPrintfAltForms.cpp index dc0b325bba..062b688ed8 100644 --- a/tests/src/printf/hipPrintfAltForms.cpp +++ b/tests/src/printf/hipPrintfAltForms.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfBasic.cpp b/tests/src/printf/hipPrintfBasic.cpp index e51373c251..db2ee269db 100644 --- a/tests/src/printf/hipPrintfBasic.cpp +++ b/tests/src/printf/hipPrintfBasic.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ @@ -219,6 +219,42 @@ static void test_series(int *retval, uint num_blocks, uint threads_per_block) { HIPASSERT(linecount[msg_short] == num_threads); } +__global__ void kernel_divergent_loop() { + DECLARE_DATA(); + + const uint tid = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; + int result = 0; + + for (int i = 0; i <= tid; ++i) { + printf("%d\n", i); + } +} + +static void test_divergent_loop(uint num_blocks, uint threads_per_block) { + CaptureStream captured(stdout); + + uint num_threads = num_blocks * threads_per_block; + + hipLaunchKernelGGL(kernel_divergent_loop, dim3(num_blocks), dim3(threads_per_block), + 0, 0); + hipStreamSynchronize(0); + auto CapturedData = captured.getCapturedData(); + + std::map count; + while (true) { + int i; + CapturedData >> i; + if (CapturedData.fail()) + break; + count[i]++; + } + + HIPASSERT(count.size() == num_threads); + for (int i = 0; i != num_threads; ++i) { + HIPASSERT(count[i] == num_threads - i); + } +} + int main() { uint num_blocks = 1; uint threads_per_block = 64; @@ -233,6 +269,7 @@ int main() { test_divergent0(retval, num_blocks, threads_per_block); test_divergent1(retval, num_blocks, threads_per_block); test_series(retval, num_blocks, threads_per_block); + test_divergent_loop(num_blocks, threads_per_block); passed(); } diff --git a/tests/src/printf/hipPrintfFlags.cpp b/tests/src/printf/hipPrintfFlags.cpp index 5093c5e1c9..e9feb2e5e8 100644 --- a/tests/src/printf/hipPrintfFlags.cpp +++ b/tests/src/printf/hipPrintfFlags.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfManyDevices.cpp b/tests/src/printf/hipPrintfManyDevices.cpp index 23751e26ea..44c8ed5741 100644 --- a/tests/src/printf/hipPrintfManyDevices.cpp +++ b/tests/src/printf/hipPrintfManyDevices.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfManyWaves.cpp b/tests/src/printf/hipPrintfManyWaves.cpp index c15e695424..dae6dde307 100644 --- a/tests/src/printf/hipPrintfManyWaves.cpp +++ b/tests/src/printf/hipPrintfManyWaves.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfSpecifiers.cpp b/tests/src/printf/hipPrintfSpecifiers.cpp index 03507658b9..009c76a968 100644 --- a/tests/src/printf/hipPrintfSpecifiers.cpp +++ b/tests/src/printf/hipPrintfSpecifiers.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfStar.cpp b/tests/src/printf/hipPrintfStar.cpp index 5e97d6eae5..e4d48e692f 100644 --- a/tests/src/printf/hipPrintfStar.cpp +++ b/tests/src/printf/hipPrintfStar.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/printf/hipPrintfWidthPrecision.cpp b/tests/src/printf/hipPrintfWidthPrecision.cpp index db003377a9..5bf6b65724 100644 --- a/tests/src/printf/hipPrintfWidthPrecision.cpp +++ b/tests/src/printf/hipPrintfWidthPrecision.cpp @@ -21,8 +21,8 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc - * TEST: %t EXCLUDE_HIP_PLATFORM NVCC EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc + * TEST: %t EXCLUDE_HIP_PLATFORM nvcc EXCLUDE_HIP_RUNTIME HCC EXCLUDE_HIP_COMPILER hcc * HIT_END */ diff --git a/tests/src/runtimeApi/event/hipEventElapsedTime.cpp b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp new file mode 100644 index 0000000000..61afaa93d3 --- /dev/null +++ b/tests/src/runtimeApi/event/hipEventElapsedTime.cpp @@ -0,0 +1,103 @@ +/* +Copyright (c) 2020-Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#include "test_common.h" + +void NegativeTests(){ + + // Null pointers + { + hipEvent_t start,end; + float tms = 1.0f; + HIPASSERT(hipEventElapsedTime(nullptr,start,end) == hipErrorInvalidValue); +#ifndef __HIP_PLATFORM_NVCC__ + // On NVCC platform API throws seg fault hence skipping + HIPASSERT(hipEventElapsedTime(&tms,nullptr,end) == hipErrorInvalidHandle); + HIPASSERT(hipEventElapsedTime(&tms,start,nullptr) == hipErrorInvalidHandle); +#endif + } + + // Event created using disabled timing + { + float timeElapsed = 1.0f; + hipEvent_t start, stop; + HIPCHECK(hipEventCreateWithFlags(&start,hipEventDisableTiming)); + HIPCHECK(hipEventCreateWithFlags(&stop,hipEventDisableTiming)); + HIPASSERT(hipEventElapsedTime(&timeElapsed, start, stop) == hipErrorInvalidHandle); + } + + // events created different devices + { + int devCount = 0; + HIPCHECK(hipGetDeviceCount(&devCount)); + if (devCount > 1){ + // create event on dev=0 + HIPCHECK(hipSetDevice(0)); + hipEvent_t start; + HIPCHECK(hipEventCreate(&start)); + + // create event on dev=1 + HIPCHECK(hipSetDevice(1)); + hipEvent_t stop; + HIPCHECK(hipEventCreate(&stop)); + + HIPCHECK(hipEventRecord(start, nullptr)); + HIPCHECK(hipEventSynchronize(start)); + + HIPCHECK(hipEventRecord(stop, nullptr)); + HIPCHECK(hipEventSynchronize(stop)); + + float tElapsed = 1.0f; + HIPASSERT(hipEventElapsedTime(&tElapsed,start,stop) == hipErrorInvalidHandle); + } + } +} + +void PositiveTest(){ + hipEvent_t start; + HIPCHECK(hipEventCreate(&start)); + + hipEvent_t stop; + HIPCHECK(hipEventCreate(&stop)); + + HIPCHECK(hipEventRecord(start, nullptr)); + HIPCHECK(hipEventSynchronize(start)); + + HIPCHECK(hipEventRecord(stop, nullptr)); + HIPCHECK(hipEventSynchronize(stop)); + + float tElapsed = 1.0f; + HIPCHECK(hipEventElapsedTime(&tElapsed,start,stop)); +} + +int main(){ + + NegativeTests(); + PositiveTest(); + passed(); +} diff --git a/tests/src/runtimeApi/event/hipEventIpc.cpp b/tests/src/runtimeApi/event/hipEventIpc.cpp new file mode 100644 index 0000000000..dd6c23e334 --- /dev/null +++ b/tests/src/runtimeApi/event/hipEventIpc.cpp @@ -0,0 +1,112 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +// Test hipEventRecord serialization behavior. +// Through manual inspection of the reported timestamps, can determine if recording a NULL event +// forces synchronization : set + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: %t --iterations 10 + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main(int argc, char* argv[]) { + HipTest::parseStandardArguments(argc, argv, true); + + unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + if (blocks > 1024) blocks = 1024; + if (blocks == 0) blocks = 1; + + printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N, + ((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations); + printf("iterations=%d\n", iterations); + + size_t Nbytes = N * sizeof(float); + + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N); + + hipEvent_t start, stop; + + // NULL stream check: + HIPCHECK(hipEventCreateWithFlags(&start, hipEventDisableTiming|hipEventInterprocess)); + HIPCHECK(hipEventCreateWithFlags(&stop, hipEventDisableTiming|hipEventInterprocess)); + + + HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); + + + for (int i = 0; i < iterations; i++) { + //--- START TIMED REGION + long long hostStart = HipTest::get_time(); + // Record the start event + HIPCHECK(hipEventRecord(start, NULL)); + + hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, + static_cast(A_d), static_cast(B_d), C_d, N); + + + HIPCHECK(hipEventRecord(stop, NULL)); + HIPCHECK(hipEventSynchronize(stop)); + HIPCHECK(hipEventQuery(stop)); + long long hostStop = HipTest::get_time(); + //--- STOP TIMED REGION + + + float eventMs = 1.0f; + // should fail + HIPASSERT(hipSuccess != hipEventElapsedTime(&eventMs, start, stop)); + float hostMs = HipTest::elapsed_time(hostStart, hostStop); + + printf("host_time (gettimeofday) =%6.3fms\n", hostMs); + printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs); + printf("\n"); + + } + + hipIpcEventHandle_t ipc_handle; + HIPCHECK(hipIpcGetEventHandle(&ipc_handle, start)); + + hipEvent_t ipc_event; + HIPCHECK(hipIpcOpenEventHandle(&ipc_event, ipc_handle)); + + HIPCHECK(hipEventSynchronize(ipc_event)); + + HIPCHECK(hipEventDestroy(ipc_event)); + HIPCHECK(hipEventDestroy(start)); + HIPCHECK(hipEventDestroy(stop)); + + HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + + + printf("check:\n"); + + HipTest::checkVectorADD(A_h, B_h, C_h, N, true); + + + passed(); +} diff --git a/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp new file mode 100644 index 0000000000..febc664f7d --- /dev/null +++ b/tests/src/runtimeApi/memory/hipMemcpyNegetiveTests.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi + * TEST: %t + * HIT_END + */ + + +#include "test_common.h" + +int main() { + int* A; + int* Ad; + int* Bd; + + // Allocation + HIPCHECK(hipMalloc((void**)&Ad, sizeof(int))); + HIPCHECK(hipMalloc((void**)&Bd, sizeof(int))); + HIPCHECK(hipHostMalloc((void**)&A,sizeof(int))); + + // Kind should be ignored and test should pass even for incorrect kind + HIPCHECK(hipMemcpy(Ad, A, sizeof(int), hipMemcpyDeviceToHost)); + HIPCHECK(hipMemcpy(A, Ad, sizeof(int), hipMemcpyHostToDevice)); + HIPCHECK(hipMemcpy(Ad, Bd, sizeof(int), hipMemcpyHostToHost)); + HIPCHECK(hipMemcpy(A, A, sizeof(int), hipMemcpyDeviceToDevice)); + + // nullptr passed as source or destination pointer + HIPASSERT(hipSuccess != hipMemcpy(nullptr, A, sizeof(int), hipMemcpyHostToDevice)); + HIPASSERT(hipSuccess != hipMemcpy(Ad, nullptr, sizeof(int), hipMemcpyHostToDevice)); + + HIPCHECK(hipFree(Ad)); + HIPCHECK(hipFree(Bd)); + HIPCHECK(hipFree(A)); + passed(); +} diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index f459a9d100..adface243d 100644 --- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -34,7 +34,7 @@ THE SOFTWARE. #define USE_HCC_MEMTRACKER 0 /* Debug flag to show the memtracker periodically */ -#ifndef __HIP_VDI__ +#if defined(__HIP_PLATFORM_HCC__) && !defined(__HIP_VDI__) #include #else #define USE_HCC_MEMTRACKER 0 diff --git a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp index 31b9d94ac2..102387cbe7 100644 --- a/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCoopMultiKernel.cpp @@ -101,7 +101,6 @@ int main() { uint* dA[MaxGPUs]; long* dB[MaxGPUs]; long* dC; - hipModule_t Module; hipStream_t stream[MaxGPUs]; uint32_t* init = new uint32_t[BufferSizeInDwords]; @@ -156,8 +155,8 @@ int main() { for (int i = 0; i < nGpu; i++) { HIPCHECK(hipSetDevice(i)); dimBlock.x = workgroups[set]; - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(reinterpret_cast(&numBlocks), - (hipFunction_t)test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); + HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, + test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long))); std::cout << "GPU(" << i << ") Block size: " << dimBlock.x << " Num blocks per CU: " << numBlocks << "\n"; diff --git a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp index 6862c61b1e..e0fcd4108b 100644 --- a/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp +++ b/tests/src/runtimeApi/module/hipLaunchCooperativeKernel.cpp @@ -116,7 +116,7 @@ int main() { dimBlock.x = workgroups[i]; // Calculate the device occupancy to know how many blocks can be run concurrently - hipOccupancyMaxActiveBlocksPerMultiprocessor(reinterpret_cast(&numBlocks), + hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, test_gws, dimBlock.x * dimBlock.y * dimBlock.z, dimBlock.x * sizeof(long)); dimGrid.x = deviceProp.multiProcessorCount * std::min(numBlocks, 32); diff --git a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp index 4f180829fa..11bd6e7d50 100644 --- a/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp +++ b/tests/src/runtimeApi/module/hipModuleLoadDataMultThreaded.cpp @@ -107,8 +107,8 @@ void run(const std::vector& buffer) { hipFree(Ad); hipFree(Bd); - delete A; - delete B; + delete[] A; + delete[] B; hipCtxDestroy(context); } diff --git a/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp b/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp new file mode 100644 index 0000000000..f6935d0d68 --- /dev/null +++ b/tests/src/runtimeApi/module/hipModuleOccupancyMaxPotentialActiveBlockSize.cpp @@ -0,0 +1,54 @@ +/* +Copyright (c) 2019 - prsent Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "test_common.h" + +#define fileName "vcpy_kernel.code" +#define kernel_name "hello_world" + +int main(int argc, char* argv[]) { + + int gridSize = 0; + int blockSize = 0; + int numBlock = 0; + HIPCHECK(hipInit(0)); + + hipDevice_t device; + hipCtx_t context; + HIPCHECK(hipDeviceGet(&device, 0)); + HIPCHECK(hipCtxCreate(&context, 0, device)); + + hipModule_t Module; + hipFunction_t Function; + HIPCHECK(hipModuleLoad(&Module, fileName)); + HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); + HIPCHECK(hipModuleOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, Function, 0, 0)); + assert(gridSize != 0 && blockSize != 0); + HIPCHECK(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, Function, blockSize, 0)); + assert(numBlock != 0); + HIPCHECK(hipCtxDestroy(context)); + passed(); +} diff --git a/tests/src/runtimeApi/module/tex2d_kernel.cpp b/tests/src/runtimeApi/module/tex2d_kernel.cpp index be121f3e69..e744d88776 100644 --- a/tests/src/runtimeApi/module/tex2d_kernel.cpp +++ b/tests/src/runtimeApi/module/tex2d_kernel.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code + * BUILD_CMD: tex2d_kernel.code %hc --genco %S/tex2d_kernel.cpp -o tex2d_kernel.code EXCLUDE_HIP_PLATFORM vdi * HIT_END */ diff --git a/tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp similarity index 77% rename from tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp rename to tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp index ebf656b72f..d8385669ea 100644 --- a/tests/src/runtimeApi/module/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxActiveBlocksPerMultiprocessor.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ @@ -30,10 +30,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#define fileName "vcpy_kernel.code" -#define kernel_name "hello_world" - - __global__ void f1(float *a) { *a = 1.0; } template @@ -44,12 +40,12 @@ __global__ void f2(T *a) { *a = 1; } int main(int argc, char* argv[]) { // test case for using kernel function pointer - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int gridSize = 0; + int blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0); assert(gridSize != 0 && blockSize != 0); - uint32_t numBlock = 0; + int numBlock = 0; hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f1, blockSize, 0); assert(numBlock != 0); @@ -64,15 +60,5 @@ int main(int argc, char* argv[]) { hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, f2, blockSize, 0); assert(numBlock != 0); - - // test case for using kernel with hipFunction_t type - numBlock = 0; - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - HIPCHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, Function, blockSize, 0)); - assert(numBlock != 0); - passed(); } diff --git a/tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp b/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp similarity index 75% rename from tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp rename to tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp index a81862952d..fc8538df26 100644 --- a/tests/src/runtimeApi/module/hipOccupancyMaxPotentialBlockSize.cpp +++ b/tests/src/runtimeApi/occupancy/hipOccupancyMaxPotentialBlockSize.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. // Test the Grid_Launch syntax. /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp * TEST: %t * HIT_END */ @@ -30,10 +30,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "test_common.h" -#define fileName "vcpy_kernel.code" -#define kernel_name "hello_world" - - __global__ void f1(float *a) { *a = 1.0; } template @@ -44,8 +40,8 @@ __global__ void f2(T *a) { *a = 1; } int main(int argc, char* argv[]) { // test case for using kernel function pointer - uint32_t gridSize = 0; - uint32_t blockSize = 0; + int gridSize = 0; + int blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f1, 0, 0); assert(gridSize != 0 && blockSize != 0); @@ -54,16 +50,5 @@ int main(int argc, char* argv[]) { blockSize = 0; hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, f2, 0, 0); assert(gridSize != 0 && blockSize != 0); - - // test case for using kernel with hipFunction_t type - gridSize = 0; - blockSize = 0; - hipModule_t Module; - hipFunction_t Function; - HIPCHECK(hipModuleLoad(&Module, fileName)); - HIPCHECK(hipModuleGetFunction(&Function, Module, kernel_name)); - HIPCHECK(hipOccupancyMaxPotentialBlockSize(&gridSize, &blockSize, Function, 0, 0)); - assert(gridSize != 0 && blockSize != 0); - passed(); } diff --git a/tests/src/runtimeApi/stream/StreamAddCallback.cpp b/tests/src/runtimeApi/stream/StreamAddCallback.cpp new file mode 100644 index 0000000000..e6492c7ce2 --- /dev/null +++ b/tests/src/runtimeApi/stream/StreamAddCallback.cpp @@ -0,0 +1,145 @@ +#include +#include +#include +#include "test_common.h" +#include + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +enum class ExecState +{ + EXEC_NOT_STARTED, + EXEC_STARTED, + EXEC_CB_STARTED, + EXEC_CB_FINISHED, + EXEC_FINISHED +}; + +struct UserData +{ + size_t size; + int* ptr; +}; + +// Global variable to check exection order +std::atomic gData(ExecState::EXEC_NOT_STARTED); + + +void myCallback(hipStream_t stream, hipError_t status, void* user_data) +{ + if(gData.load() != ExecState::EXEC_STARTED) + return; // Error hence return early + + gData.store(ExecState::EXEC_CB_STARTED); + + UserData* data = reinterpret_cast(user_data); + printf("Callback started\n"); + + sleep(1); + + printf("Callback ending.\n"); + gData.store(ExecState::EXEC_CB_FINISHED); +} + +bool test(int count) +{ + printf("\n============ Test iteration %d =============\n",count); + // Stream + hipStream_t stream; + bool result = true; + + gData.store(ExecState::EXEC_STARTED); + + HIPCHECK(hipStreamCreate(&stream)); + + // Array size + size_t size = 10000; + + // Device array + int *data = NULL; + HIPCHECK(hipMalloc((void**)&data, sizeof(int) * size)); + + // Initialize device array to -1 + HIPCHECK(hipMemset(data, -1, sizeof(int) * size)); + + // Host array + int *host = NULL; + HIPCHECK(hipHostMalloc((void**)&host, sizeof(int) * size)); + + // Print host ptr address + printf("In main thread\n"); + + // Initialize user_data for callback + UserData arg; + arg.size = size; + arg.ptr = host; + + // Synchronize device + HIPCHECK(hipDeviceSynchronize()); + + // Asynchronous copy from device to host + HIPCHECK(hipMemcpyAsync(host, data, sizeof(int) * size, hipMemcpyDeviceToHost, stream)); + + // Asynchronous memset on device + HIPCHECK(hipMemsetAsync(data, 0, sizeof(int) * size, stream)); + + // Add callback - should happen after hipMemsetAsync() + HIPCHECK(hipStreamAddCallback(stream, myCallback, &arg, 0)); + + printf("Will wait in main thread until callback completes\n"); + + //This should synchronize the stream (including the callback) + HIPCHECK(hipStreamSynchronize(stream)); + + if(gData.load() != ExecState::EXEC_CB_FINISHED) + { + std::cout<<"Callback is not finished\n"; + return false; + } + printf("Callback completed will resume main thread execution\n"); + + if(host[size/2] != -1) + { + // Print some host data that just got copied + printf("Pseudo host data printing (should be -1): %d\n", host[size/2]); + result = false; + } + + HIPCHECK(hipMemcpy(host, data, sizeof(int)*size, hipMemcpyDeviceToHost)); + + if(host[size-1] != 0) + { + printf("Pseudo host data printing (should be 0): %d\n", host[size-1]); + result = false; + } + + HIPCHECK(hipFree(data)); + HIPCHECK(hipHostFree(host)); + HIPCHECK(hipStreamDestroy(stream)); + + gData.store(ExecState::EXEC_FINISHED); + return result; +} + +int main() +{ + // Test involves multithreading hence running multiple times + // to make sure consitency in the behavior + bool status = true; + + for(int i=0; i < 10; i++){ + status = test(i+1); + if(status == false) + { + failed("Test Failed!\n"); + break; + } + } + + if(status == true) passed(); + return 0; +} diff --git a/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp new file mode 100644 index 0000000000..c22b390ecc --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamAddCallbackCatch.cpp @@ -0,0 +1,409 @@ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include "test_common.h" + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#define WORKAROUND 0 // Enable (1) this to make stream thread-safe by a workaround + +template // = queue blocks, until task is finished in enqueue(queue,task) +class QueueHipRt; + +// Queue types used in the tests +using TestQueues = std::tuple, QueueHipRt>; + + +// --- Implementation + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) +#define HIP_ASSERT_IGNORE(x,ign) auto err=x; HIP_ASSERT(err==ign ? hipSuccess : err) + +#ifdef __HIP_PLATFORM_HCC__ + #define HIPRT_CB +#endif + +template +static auto currentThreadWaitFor(QueueHipRt const & queue) -> void; + +template +class QueueHipRt +{ +public: + static constexpr bool isBlocking = IsBlocking; + //----------------------------------------------------------------------------- + QueueHipRt( + int dev) : + m_dev(dev), + m_HipQueue() + { + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamCreateWithFlags( + &m_HipQueue, + hipStreamNonBlocking)); + } + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt const &) = delete; + //----------------------------------------------------------------------------- + QueueHipRt(QueueHipRt &&) = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt const &) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + auto operator=(QueueHipRt &&) -> QueueHipRt & = delete; + //----------------------------------------------------------------------------- + ~QueueHipRt() + { + if(isBlocking) { +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // we are a non-blocking queue, so we have to wait here with its destruction until all spawned tasks have been processed + currentThreadWaitFor(*this); +#endif + } + HIP_ASSERT( + hipSetDevice( + m_dev)); + HIP_ASSERT( + hipStreamDestroy( + m_HipQueue)); + } + +public: + int m_dev; //!< The device this queue is bound to. + hipStream_t m_HipQueue; + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + int m_callees = 0; + std::mutex m_mutex; +#endif +}; + +template +struct Enqueue +{ + //############################################################################# + enum class CallbackState + { + enqueued, + notified, + finished, + }; + + //############################################################################# + struct CallbackSynchronizationData : public std::enable_shared_from_this + { + std::mutex m_mutex; + std::condition_variable m_event; + CallbackState state = CallbackState::enqueued; + }; + + //----------------------------------------------------------------------------- + static void HIPRT_CB hipRtCallback(hipStream_t /*queue*/, hipError_t /*status*/, void *arg) + { + // explicitly copy the shared_ptr so that this method holds the state even when the executing thread has already finished. + const auto pCallbackSynchronizationData = reinterpret_cast(arg)->shared_from_this(); + + // Notify the executing thread. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + pCallbackSynchronizationData->state = CallbackState::notified; + } + pCallbackSynchronizationData->m_event.notify_one(); + + // Wait for the executing thread to finish the task if it has not already finished. + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::finished) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::finished; + } + ); + } + } + + //----------------------------------------------------------------------------- + template + static auto enqueue( + QueueHipRt & queue, + TTask const & task) + -> void + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + { + // thread-safe callee incrementing + std::lock_guard guard(queue.m_mutex); + queue.m_callees += 1; + } +#endif + auto pCallbackSynchronizationData = std::make_shared(); + // test example: https://github.com/ROCm-Developer-Tools/HIP/blob/roc-1.9.x/tests/src/runtimeApi/stream/hipStreamAddCallback.cpp + HIP_ASSERT(hipStreamAddCallback( + queue.m_HipQueue, + hipRtCallback, + pCallbackSynchronizationData.get(), + 0u)); + + // We start a new std::thread which stores the task to be executed. + // This circumvents the limitation that it is not possible to call HIP methods within the HIP callback thread. + // The HIP thread signals the std::thread when it is ready to execute the task. + // The HIP thread is waiting for the std::thread to signal that it is finished executing the task + // before it executes the next task in the queue (HIP stream). + std::thread t( + [pCallbackSynchronizationData, + task +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + ,&queue // requires queue's destructor to wait for all tasks +#endif + ](){ + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + // thread-safe task execution and callee decrementing + std::lock_guard guard(queue.m_mutex); +#endif + + // If the callback has not yet been called, we wait for it. + { + std::unique_lock lock(pCallbackSynchronizationData->m_mutex); + if(pCallbackSynchronizationData->state != CallbackState::notified) + { + pCallbackSynchronizationData->m_event.wait( + lock, + [pCallbackSynchronizationData](){ + return pCallbackSynchronizationData->state == CallbackState::notified; + } + ); + } + + task(); + + // Notify the waiting HIP thread. + pCallbackSynchronizationData->state = CallbackState::finished; + } + pCallbackSynchronizationData->m_event.notify_one(); +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + queue.m_callees -= 1; +#endif + } + ); + if(isBlocking) + t.join(); // => waiting for task completion + else + t.detach(); // => do not wait for task completion + } +}; +//############################################################################# +//! The HIP RT non-blocking queue test trait specialization. +struct Empty +{ + //----------------------------------------------------------------------------- + template + static auto empty( + QueueHipRt const & queue) + -> bool + { + +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + return (queue.m_callees==0); +#else + + // Query is allowed even for queues on non current device. + hipError_t ret = hipSuccess; + HIP_ASSERT_IGNORE( + ret = hipStreamQuery( + queue.m_HipQueue), + hipErrorNotReady); + return (ret == hipSuccess); +#endif + } +}; + +template +auto currentThreadWaitFor(QueueHipRt const & queue) -> void +{ +#if WORKAROUND // NOTE: workaround for unwanted nonblocking hip streams for HCC (NVCC streams are blocking) + while(queue.m_callees>0) { + std::this_thread::sleep_for(std::chrono::milliseconds(10u)); + } +#else + // Sync is allowed even for queues on non current device. + HIP_ASSERT( hipStreamSynchronize( + queue.m_HipQueue)); +#endif +} + + + + +// --- Tests + +#define TEMPLATE_LIST_TEST_CASE(TestName) \ +template static void TestName (std::atomic &check); \ +static int TestName##Runner () { \ + std::atomic check{0}; \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + TestName< QueueHipRt >(check); \ + fprintf(stderr, "After " #TestName " < QueueHipRt > errors=%d\n", check.load()); \ + return check.load(); \ +} \ +template static void TestName (std::atomic &check) + +// add 1 if a check fails +#define CHECK(result) do{int arg=(!(result)); fprintf(stderr, "Checking " #result " %d\n", arg); check.fetch_add(arg);}while(false) + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueIsInitiallyEmpty ) +{ + TestType queue{0}; + CHECK(Empty::empty(queue)); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueCallbackIsWorking ) +{ + std::promise promise; + auto task = [&](){ promise.set_value(true); }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + CHECK(promise.get_future().get()); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueWaitShouldWork ) +{ + bool CallbackFinished = false; + auto task = + [&CallbackFinished]() noexcept + { + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + TestType queue{0}; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + + currentThreadWaitFor(queue); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinished ) +{ + bool CallbackFinished = false; + TestType queue{0}; + auto task = [&queue, &CallbackFinished, &check]() noexcept + { + CHECK(!Empty::empty(queue)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CallbackFinished = true; + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task + ); + // A non-blocking queue will always stay empty because the task has been executed immediately. + if(!TestType::isBlocking) + { + currentThreadWaitFor(queue); + } + + CHECK(Empty::empty(queue)); + CHECK(CallbackFinished); +} + +//----------------------------------------------------------------------------- +TEMPLATE_LIST_TEST_CASE( queueShouldNotExecuteTasksInParallel ) +{ + std::atomic taskIsExecuting(false); + std::promise firstTaskFinished; + std::future firstTaskFinishedFuture = firstTaskFinished.get_future(); + std::promise secondTaskFinished; + std::future secondTaskFinishedFuture = secondTaskFinished.get_future(); + + TestType queue{0}; + + std::thread thread1( + [&queue, &taskIsExecuting, &firstTaskFinished, &check]() + { + auto task1 = [&taskIsExecuting, &firstTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + firstTaskFinished.set_value(); + }; + Enqueue enqueue; + enqueue.enqueue( + queue, + task1 + ); + }); + + std::thread thread2( + [&queue, &taskIsExecuting, &secondTaskFinished, &check]() + { + auto task2 = [&taskIsExecuting, &secondTaskFinished, &check]() noexcept + { + CHECK(!taskIsExecuting.exchange(true)); + std::this_thread::sleep_for(std::chrono::milliseconds(100u)); + CHECK(taskIsExecuting.exchange(false)); + secondTaskFinished.set_value(); + }; + + Enqueue enqueue; + enqueue.enqueue( + queue, + task2 + ); + }); + + // Both tasks have to be enqueued + thread1.join(); + thread2.join(); + + currentThreadWaitFor(queue); + + firstTaskFinishedFuture.get(); + secondTaskFinishedFuture.get(); +} + +#define TESTER(name) do { \ + int result = name (); \ + fprintf(stderr, #name " %s\n", result?"Errors":"No Errors"); \ + if (result) { failed(#name " failed\n"); } \ +} while (false) + +int main() +{ + TESTER(queueIsInitiallyEmptyRunner); + TESTER(queueCallbackIsWorkingRunner); + TESTER(queueWaitShouldWorkRunner); + TESTER(queueShouldNotBeEmptyWhenLastTaskIsStillExecutingAndIsEmptyAfterProcessingFinishedRunner); + TESTER(queueShouldNotExecuteTasksInParallelRunner); + passed(); +} diff --git a/tests/src/test_common.h b/tests/src/test_common.h index 7d8c39e74c..8897dc938e 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -55,11 +55,15 @@ THE SOFTWARE. printf("%sPASSED!%s\n", KGRN, KNRM); \ exit(0); +// The real "assert" would have written to stderr. But it is +// sufficient to just fflush here without getting pedantic. This also +// ensures that we don't lose any earlier writes to stdout. #define failed(...) \ printf("%serror: ", KRED); \ printf(__VA_ARGS__); \ printf("\n"); \ printf("error: TEST FAILED\n%s", KNRM); \ + fflush(NULL); \ abort(); #define warn(...) \ diff --git a/tests/src/texture/hipBindTex2DPitch.cpp b/tests/src/texture/hipBindTex2DPitch.cpp index b01402c91d..6cee22a45d 100644 --- a/tests/src/texture/hipBindTex2DPitch.cpp +++ b/tests/src/texture/hipBindTex2DPitch.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /*HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ @@ -28,9 +28,6 @@ THE SOFTWARE. #define SIZE_W 12 #define TYPE_t float -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex; // texture object is a kernel argument diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp index 52a0d99ac1..af79153fe0 100644 --- a/tests/src/texture/hipBindTexRef1DFetch.cpp +++ b/tests/src/texture/hipBindTexRef1DFetch.cpp @@ -22,7 +22,7 @@ THE SOFTWARE. /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ @@ -32,9 +32,6 @@ THE SOFTWARE. #define N 512 -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex; __global__ void kernel(float *out) { diff --git a/tests/src/texture/hipNormalizedFloatValueTex.cpp b/tests/src/texture/hipNormalizedFloatValueTex.cpp index 96917ecb62..b4aa3e9c05 100644 --- a/tests/src/texture/hipNormalizedFloatValueTex.cpp +++ b/tests/src/texture/hipNormalizedFloatValueTex.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc hcc vdi * TEST: %t * HIT_END */ diff --git a/tests/src/texture/hipTex1DFetchCheckModes.cpp b/tests/src/texture/hipTex1DFetchCheckModes.cpp new file mode 100644 index 0000000000..381d07280c --- /dev/null +++ b/tests/src/texture/hipTex1DFetchCheckModes.cpp @@ -0,0 +1,122 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi + * TEST: %t + * HIT_END + */ + +#include "hip/hip_runtime.h" +#include "../test_common.h" + +#define N 16 +#define offset 3 +__global__ void tex1dKernel(float *val, hipTextureObject_t obj) { + int k = blockIdx.x * blockDim.x + threadIdx.x; + if (k < N) + val[k] = tex1Dfetch(obj, k+offset); +} + +int runTest(hipTextureAddressMode, hipTextureFilterMode); + +int main(int argc, char **argv) { + int testResult = runTest(hipAddressModeClamp,hipFilterModePoint); + testResult = runTest(hipAddressModeClamp,hipFilterModeLinear); + testResult = runTest(hipAddressModeWrap,hipFilterModePoint); + testResult = runTest(hipAddressModeWrap,hipFilterModeLinear); + if(testResult) { + passed(); + } else { + exit(EXIT_FAILURE); + } +} + +int runTest(hipTextureAddressMode addressMode, hipTextureFilterMode filterMode) { + + int testResult = 1; + + hipCtx_t HipContext; + hipDevice_t HipDevice; + int deviceID = 0; + hipDeviceGet(&HipDevice, deviceID); + hipCtxCreate(&HipContext, 0, HipDevice); + + // Allocating the required buffer on gpu device + float *texBuf, *texBufOut; + float val[N], output[N]; + + for (int i = 0; i < N; i++) { + val[i] = i+1; + output[i] = 0.0; + } + + HIPCHECK(hipMalloc(&texBuf, N * sizeof(float))); + HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float))); + HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice)); + HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float))); + hipResourceDesc resDescLinear; + + memset(&resDescLinear, 0, sizeof(resDescLinear)); + resDescLinear.resType = hipResourceTypeLinear; + resDescLinear.res.linear.devPtr = texBuf; + resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat); + resDescLinear.res.linear.sizeInBytes = N * sizeof(float); + + hipTextureDesc texDesc; + memset(&texDesc, 0, sizeof(texDesc)); + texDesc.readMode = hipReadModeElementType; + + texDesc.addressMode[0] = addressMode; + texDesc.addressMode[1] = addressMode; + texDesc.filterMode = filterMode; + texDesc.normalizedCoords = false; + + // Creating texture object + hipTextureObject_t texObj = 0; + HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL)); + + dim3 dimBlock(1, 1, 1); + dim3 dimGrid(N , 1, 1); + + hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, + texBufOut, texObj); + HIPCHECK(hipDeviceSynchronize()); + + HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost)); + + for (int i = offset; i < N; i++) { + if (output[i-offset] != val[i]) { + testResult = 0; + break; + } + } + if(testResult){ + for(int i = N-offset; i < N; i++){ + if (output[i] != 0){ + testResult = 0; + break; + } + } + } + HIPCHECK(hipDestroyTextureObject(texObj)); + HIPCHECK(hipFree(texBuf)); + HIPCHECK(hipFree(texBufOut)); + return testResult; +} diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp index b476ae8062..5247f81fe0 100644 --- a/tests/src/texture/hipTextureRef2D.cpp +++ b/tests/src/texture/hipTextureRef2D.cpp @@ -1,5 +1,5 @@ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ @@ -9,9 +9,7 @@ #include #include "test_common.h" -#if __HIP__ -__hip_pinned_shadow__ -#endif + texture tex; __global__ void tex2DKernel(float* outputData, diff --git a/tests/src/texture/simpleTexture2DLayered.cpp b/tests/src/texture/simpleTexture2DLayered.cpp index e5014dae6b..8b1bbb64a3 100644 --- a/tests/src/texture/simpleTexture2DLayered.cpp +++ b/tests/src/texture/simpleTexture2DLayered.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM vdi * TEST: %t * HIT_END */ @@ -30,9 +30,6 @@ THE SOFTWARE. typedef float T; // Texture reference for 2D Layered texture -#if __HIP__ -__hip_pinned_shadow__ -#endif texture tex2DL; __global__ void simpleKernelLayeredArray(T* outputData,int width,int height,int layer) diff --git a/tests/src/texture/simpleTexture3D.cpp b/tests/src/texture/simpleTexture3D.cpp index 5395fc5d69..82f6cf5e99 100644 --- a/tests/src/texture/simpleTexture3D.cpp +++ b/tests/src/texture/simpleTexture3D.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc vdi * TEST: %t * HIT_END */ @@ -31,19 +31,10 @@ THE SOFTWARE. const char *sampleName = "simpleTexture3D"; // Texture reference for 3D texture -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texf; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texi; -#if __HIP__ -__hip_pinned_shadow__ -#endif texture texc; template diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp new file mode 100644 index 0000000000..1c0dcc8c34 --- /dev/null +++ b/tests/unit/test_common.cpp @@ -0,0 +1,180 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include "test_common.h" + +// standard global variables that can be set on command line +size_t N = 4 * 1024 * 1024; +char memsetval = 0x42; +int memsetD32val = 0xDEADBEEF; +short memsetD16val = 0xDEAD; +char memsetD8val = 0xDE; +int iterations = 1; +unsigned blocksPerCU = 6; // to hide latency +unsigned threadsPerBlock = 256; +int p_gpuDevice = 0; +unsigned p_verbose = 0; +int p_tests = -1; /*which tests to run. Interpretation is left to each test. default:all*/ +#ifdef _WIN64 +const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES="; +const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES="; +const char* PATH_SEPERATOR_STR = "\\"; +const char* NULL_DEVICE = "NUL:"; +#else +const char* HIP_VISIBLE_DEVICES_STR = "HIP_VISIBLE_DEVICES"; +const char* CUDA_VISIBLE_DEVICES_STR = "CUDA_VISIBLE_DEVICES"; +const char* PATH_SEPERATOR_STR = "/"; +const char* NULL_DEVICE = "/dev/null"; +#endif + +namespace HipTest { + + +double elapsed_time(long long startTimeUs, long long stopTimeUs) { + return ((double)(stopTimeUs - startTimeUs)) / ((double)(1000)); +} + + +int parseSize(const char* str, size_t* output) { + char* next; + *output = strtoull(str, &next, 0); + int l = strlen(str); + if (l) { + char c = str[l - 1]; // last char. + if ((c == 'k') || (c == 'K')) { + *output *= 1024; + } + if ((c == 'm') || (c == 'M')) { + *output *= (1024 * 1024); + } + if ((c == 'g') || (c == 'G')) { + *output *= (1024 * 1024 * 1024); + } + } + return 1; +} + + +int parseUInt(const char* str, unsigned int* output) { + char* next; + *output = strtoul(str, &next, 0); + return !strlen(next); +} + + +int parseInt(const char* str, int* output) { + char* next; + *output = strtol(str, &next, 0); + return !strlen(next); +} + + +int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg) { + int extraArgs = 1; + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + + if (!strcmp(arg, " ")) { + // skip NULL args. + } else if (!strcmp(arg, "--N") || (!strcmp(arg, "-N"))) { + if (++i >= argc || !HipTest::parseSize(argv[i], &N)) { + failed("Bad N size argument"); + } + } else if (!strcmp(arg, "--threadsPerBlock")) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &threadsPerBlock)) { + failed("Bad threadsPerBlock argument"); + } + } else if (!strcmp(arg, "--blocksPerCU")) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &blocksPerCU)) { + failed("Bad blocksPerCU argument"); + } + } else if (!strcmp(arg, "--memsetval")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetval argument"); + } + memsetval = ex; + } else if (!strcmp(arg, "--memsetD32val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD32val argument"); + } + memsetD32val = ex; + } else if (!strcmp(arg, "--memsetD16val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD16val argument"); + } + memsetD16val = ex; + } else if (!strcmp(arg, "--memsetD8val")) { + int ex; + if (++i >= argc || !HipTest::parseInt(argv[i], &ex)) { + failed("Bad memsetD8val argument"); + } + memsetD8val = ex; + } else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &iterations)) { + failed("Bad iterations argument"); + } + + } else if (!strcmp(arg, "--gpu") || (!strcmp(arg, "-gpuDevice")) || (!strcmp(arg, "-g"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_gpuDevice)) { + failed("Bad gpuDevice argument"); + } + + } else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) { + if (++i >= argc || !HipTest::parseUInt(argv[i], &p_verbose)) { + failed("Bad verbose argument"); + } + } else if (!strcmp(arg, "--tests") || (!strcmp(arg, "-t"))) { + if (++i >= argc || !HipTest::parseInt(argv[i], &p_tests)) { + failed("Bad tests argument"); + } + + } else { + if (failOnUndefinedArg) { + failed("Bad argument '%s'", arg); + } else { + argv[extraArgs++] = argv[i]; + } + } + }; + + return extraArgs; +} + + +unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) { + int device; + HIPCHECK(hipGetDevice(&device)); + hipDeviceProp_t props; + HIPCHECK(hipGetDeviceProperties(&props, device)); + + unsigned blocks = props.multiProcessorCount * blocksPerCU; + if (blocks * threadsPerBlock > N) { + blocks = (N + threadsPerBlock - 1) / threadsPerBlock; + } + + return blocks; +} + + +} // namespace HipTest diff --git a/tests/unit/test_common.h b/tests/unit/test_common.h new file mode 100644 index 0000000000..4b55c70164 --- /dev/null +++ b/tests/unit/test_common.h @@ -0,0 +1,474 @@ +/* +Copyright (c) 2020-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* + * File is intended to C and CPP compliant hence any CPP specic changes + * should be added into CPP section + * + */ + +#ifdef __cplusplus + #include + #include + #if __CUDACC__ + #include + #else + #include + #endif +#endif + +// ************************ GCC section ************************** +#include + +#include "hip/hip_runtime.h" +#include "hip/hip_runtime_api.h" + +#define HC __attribute__((hc)) + + +#define KNRM "\x1B[0m" +#define KRED "\x1B[31m" +#define KGRN "\x1B[32m" +#define KYEL "\x1B[33m" +#define KBLU "\x1B[34m" +#define KMAG "\x1B[35m" +#define KCYN "\x1B[36m" +#define KWHT "\x1B[37m" + +#define passed() \ + printf("%sPASSED!%s\n", KGRN, KNRM); \ + +#define failed(...) \ + printf("%serror: ", KRED); \ + printf(__VA_ARGS__); \ + printf("%s\n",KNRM); \ + return false; + +#define warn(...) \ + printf("%swarn: ", KYEL); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + printf("warn: TEST WARNING\n%s", KNRM); + +#define skipped() printf("%sSkipped subtest %s%s\n",KYEL,__FUNCTION__,KNRM); + +#define HIPCHECK(error) \ + { \ + hipError_t localError = error; \ + if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \ + failed("%serror: '%s'(%d) from %s at %s:%d%s\n", KRED, hipGetErrorString(localError), \ + localError, #error, __FUNCTION__, __LINE__, KNRM); \ + } \ + } + +#define HIPASSERT(condition) \ + if (!(condition)) { \ + failed("%sassertion %s at %s:%d%s \n", KRED, #condition, __FUNCTION__, __LINE__, KNRM); \ + } + + +#define HIPCHECK_API(API_CALL, EXPECTED_ERROR) \ + { \ + hipError_t _e = (API_CALL); \ + if (_e != (EXPECTED_ERROR)) { \ + failed("%sAPI '%s' returned %d(%s) but test expected %d(%s) at %s:%d%s \n", KRED, \ + #API_CALL, _e, hipGetErrorName(_e), EXPECTED_ERROR, \ + hipGetErrorName(EXPECTED_ERROR), __FILE__, __LINE__, KNRM); \ + } \ + } + +#ifdef _WIN64 +#include +#define aligned_alloc(x,y) _aligned_malloc(y,x) +#define aligned_free(x) _aligned_free(x) +#define popen(x,y) _popen(x,y) +#define pclose(x) _pclose(x) +#define setenv(x,y,z) _putenv_s(x,y) +#define unsetenv _putenv +#define fileno(x) _fileno(x) +#define dup(x) _dup(x) +#define dup2(x,y) _dup2(x,y) +#define close(x) _close(x) +#else +#define aligned_free(x) free(x) +#endif + +// standard command-line variables: +extern size_t N; +extern char memsetval; +extern int memsetD32val; +extern short memsetD16val; +extern char memsetD8val; +extern int iterations; +extern unsigned blocksPerCU; +extern unsigned threadsPerBlock; +extern int p_gpuDevice; +extern unsigned p_verbose; +extern int p_tests; +extern const char* HIP_VISIBLE_DEVICES_STR; +extern const char* CUDA_VISIBLE_DEVICES_STR; +extern const char* PATH_SEPERATOR_STR; +extern const char* NULL_DEVICE; + +// ********************* CPP section ********************* +#ifdef __cplusplus + +#ifdef __HIP_PLATFORM_HCC +#define TYPENAME(T) typeid(T).name() +#else +#define TYPENAME(T) "?" +#endif + +namespace HipTest { + +// Returns the current system time in microseconds +inline long long get_time() { +#if __CUDACC__ + struct timeval tv; + gettimeofday(&tv, 0); + return (tv.tv_sec * 1000000) + tv.tv_usec; +#else + return std::chrono::high_resolution_clock::now().time_since_epoch() + /std::chrono::microseconds(1); +#endif +} + +double elapsed_time(long long startTimeUs, long long stopTimeUs); + +int parseSize(const char* str, size_t* output); +int parseUInt(const char* str, unsigned int* output); +int parseInt(const char* str, int* output); +int parseStandardArguments(int argc, char* argv[], bool failOnUndefinedArg); + +unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N); + +template // pointer type +bool checkArray(T hData, T hOutputData, size_t width, size_t height,size_t depth) +{ + for (int i = 0; i < depth; i++) { + for (int j = 0; j < height; j++) { + for (int k = 0; k < width; k++) { + int offset = i*width*height + j*width + k; + if (hData[offset] != hOutputData[offset]) { + std::cerr << '[' << i << ',' << j << ',' << k << "]:" << hData[offset] << "----" << hOutputData[offset]<<" "; + failed("mistmatch at:%d %d %d",i,j,k); + } + } + } + } + return true; +} + +template +bool checkArray(T input, T output, size_t height, size_t width) +{ + for(int i=0; i +__global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NELEM; i += stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + + +template +__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, + size_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = A_d[i] + B_d[i]; + } +} + + +template +__global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + // Deliberately do this in an inefficient way to increase kernel runtime + for (int i = 0; i < count; i++) { + for (size_t i = offset; i < NELEM; i += stride) { + C_d[i] = A_d[i] + (T)count; + } + } +} + + +template +__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + // Deliberately do this in an inefficient way to increase kernel runtime + for (int i = 0; i < count; i++) { + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = A_d[i] + (T)count; + } + } +} + + +template +__global__ void memsetReverse(T* C_d, T val, int64_t NELEM) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) { + C_d[i] = val; + } +} + + +template +void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) { + // Initialize the host data: + for (size_t i = 0; i < numElements; i++) { + if (A_h) (A_h)[i] = 3.146f + i; // Pi + if (B_h) (B_h)[i] = 1.618f + i; // Phi + if (C_h) (C_h)[i] = 0.0f + i; + } +} + + +template +bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) { + size_t Nbytes = N * sizeof(T); + + if (usePinnedHost) { + if (A_h) { + HIPCHECK(hipHostMalloc((void**)A_h, Nbytes)); + } + if (B_h) { + HIPCHECK(hipHostMalloc((void**)B_h, Nbytes)); + } + if (C_h) { + HIPCHECK(hipHostMalloc((void**)C_h, Nbytes)); + } + } else { + if (A_h) { + *A_h = (T*)malloc(Nbytes); + HIPASSERT(*A_h != NULL); + } + + if (B_h) { + *B_h = (T*)malloc(Nbytes); + HIPASSERT(*B_h != NULL); + } + + if (C_h) { + *C_h = (T*)malloc(Nbytes); + HIPASSERT(*C_h != NULL); + } + } + + setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL); + return true; +} + + +template +bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N, + bool usePinnedHost = false) { + size_t Nbytes = N * sizeof(T); + + if (A_d) { + HIPCHECK(hipMalloc(A_d, Nbytes)); + } + if (B_d) { + HIPCHECK(hipMalloc(B_d, Nbytes)); + } + if (C_d) { + HIPCHECK(hipMalloc(C_d, Nbytes)); + } + + return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost); +} + + +template +bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) { + if (usePinnedHost) { + if (A_h) { + HIPCHECK(hipHostFree(A_h)); + } + if (B_h) { + HIPCHECK(hipHostFree(B_h)); + } + if (C_h) { + HIPCHECK(hipHostFree(C_h)); + } + } else { + if (A_h) { + free(A_h); + } + if (B_h) { + free(B_h); + } + if (C_h) { + free(C_h); + } + } + return true; +} + +template +bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) { + if (A_d) { + HIPCHECK(hipFree(A_d)); + } + if (B_d) { + HIPCHECK(hipFree(B_d)); + } + if (C_d) { + HIPCHECK(hipFree(C_d)); + } + + return freeArraysForHost(A_h, B_h, C_h, usePinnedHost); +} + +#if defined(__HIP_PLATFORM_HCC__) +template +bool initArrays2DPitch(T** A_d, T** B_d, T** C_d, size_t* pitch_A, size_t* pitch_B, size_t* pitch_C, + size_t numW, size_t numH) { + if (A_d) { + HIPCHECK(hipMallocPitch((void**)A_d, pitch_A, numW * sizeof(T), numH)); + } + if (B_d) { + HIPCHECK(hipMallocPitch((void**)B_d, pitch_B, numW * sizeof(T), numH)); + } + if (C_d) { + HIPCHECK(hipMallocPitch((void**)C_d, pitch_C, numW * sizeof(T), numH)); + } + + HIPASSERT(*pitch_A == *pitch_B); + HIPASSERT(*pitch_A == *pitch_C) + return true; +} + +inline bool initHIPArrays(hipArray** A_d, hipArray** B_d, hipArray** C_d, + const hipChannelFormatDesc* desc, const size_t numW, const size_t numH, + const unsigned int flags) { + if (A_d) { + HIPCHECK(hipMallocArray(A_d, desc, numW, numH, flags)); + } + if (B_d) { + HIPCHECK(hipMallocArray(B_d, desc, numW, numH, flags)); + } + if (C_d) { + HIPCHECK(hipMallocArray(C_d, desc, numW, numH, flags)); + } + return true; +} +#endif + +// Assumes C_h contains vector add of A_h + B_h +// Calls the test "failed" macro if a mismatch is detected. +template +size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true, + bool reportMismatch = true) { + size_t mismatchCount = 0; + size_t firstMismatch = 0; + size_t mismatchesToPrint = 10; + for (size_t i = 0; i < N; i++) { + T expected = A_h[i] + B_h[i]; + if (result_H[i] != expected) { + if (mismatchCount == 0) { + firstMismatch = i; + } + mismatchCount++; + if ((mismatchCount <= mismatchesToPrint) && expectMatch) { + std::cout << std::fixed << std::setprecision(32); + std::cout << "At " << i << std::endl; + std::cout << " Computed:" << result_H[i] << std::endl; + std::cout << " Expected:" << expected << std::endl; + } + } + } + + if (reportMismatch) { + if (expectMatch) { + if (mismatchCount) { + failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + } + } else { + if (mismatchCount == 0) { + failed("expected mismatches but did not detect any!"); + } + } + } + + return mismatchCount; +} + + +// Assumes C_h contains vector add of A_h + B_h +// Calls the test "failed" macro if a mismatch is detected. +template +bool checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) { + size_t mismatchCount = 0; + size_t firstMismatch = 0; + size_t mismatchesToPrint = 10; + for (size_t i = 0; i < N; i++) { + if (result_H[i] != expected_H[i]) { + if (mismatchCount == 0) { + firstMismatch = i; + } + mismatchCount++; + if ((mismatchCount <= mismatchesToPrint) && expectMatch) { + std::cout << std::fixed << std::setprecision(32); + std::cout << "At " << i << std::endl; + std::cout << " Computed:" << result_H[i] << std::endl; + std::cout << " Expected:" << expected_H[i] << std::endl; + } + } + } + + if (expectMatch) { + if (mismatchCount) { + fprintf(stderr, "%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + // failed("%zu mismatches ; first at index:%zu\n", mismatchCount, firstMismatch); + } + } else { + if (mismatchCount == 0) { + failed("expected mismatches but did not detect any!"); + } + } + return true; +} + +}; // namespace HipTest +#endif //__cplusplus diff --git a/vdi/CMakeLists.txt b/vdi/CMakeLists.txt index ee772874af..5460b2ba6b 100644 --- a/vdi/CMakeLists.txt +++ b/vdi/CMakeLists.txt @@ -1,7 +1,7 @@ #project("hip") cmake_minimum_required(VERSION 3.5.1) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") +set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-keep-memory -Wl,-Bsymbolic -Wl,--unresolved-symbols=report-all -Wl,--version-script=${CMAKE_CURRENT_LIST_DIR}/hip_hcc.map.in") if(CMAKE_CXX_FLAGS MATCHES "fsanitize=address") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") @@ -156,14 +156,40 @@ add_library(amdhip64_static STATIC $ ) +set_target_properties( + amdhip64 PROPERTIES + VERSION ${HIP_LIB_VERSION_STRING} + SOVERSION ${HIP_LIB_VERSION_MAJOR} +) +# We expect amdhip64_static to contain objects of vdi and hip. But linker +# let amdhip64_static contain objects of hip only. So we will use a +# a custom amdhip64_static_combiner to combine objects of vid and hip into +# amdhip64_static. To avoid amdhip64_static contains itself, +# amdhip64_static_temp is created internally. +add_library(amdhip64_static_temp STATIC + $ + ) + add_library(host INTERFACE) target_link_libraries(host INTERFACE amdhip64) -target_link_libraries(host INTERFACE amdhip64_static) add_library(device INTERFACE) target_link_libraries(device INTERFACE host) +# TODO: we may create host_static and device_static to let app +# link amdhip64_static -target_link_libraries(amdhip64_static amdvdi_static pthread dl) -target_link_libraries(amdhip64 amdvdi_static pthread dl) +target_link_libraries(amdhip64 PRIVATE amdvdi_static pthread dl) +target_link_libraries(amdhip64_static PRIVATE pthread dl) +target_link_libraries(amdhip64_static_temp PRIVATE pthread dl) + +# combine objects of vid and hip into amdhip64_static +add_custom_target( + amdhip64_static_combiner + ALL + COMMAND rm -f $ # Must remove old one, otherwise the new one will contain obsolete stuff + COMMAND ${CMAKE_AR} -rcsT $ $ $ + DEPENDS amdhip64_static amdhip64_static_temp amdvdi_static # To make sure this is the last step + COMMENT "Combining static libs into amdhip64_static" +) INSTALL(PROGRAMS $ DESTINATION lib COMPONENT MAIN) @@ -175,6 +201,6 @@ INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhip_hcc.so DESTINATION lib COMPONENT MA INSTALL(FILES ${CMAKE_BINARY_DIR}/lib/libhiprtc.so DESTINATION lib COMPONENT MAIN) -INSTALL(TARGETS amdhip64_static amdhip64 host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) +INSTALL(TARGETS amdhip64 amdhip64_static host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR}) INSTALL(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::) diff --git a/vdi/hip_context.cpp b/vdi/hip_context.cpp old mode 100644 new mode 100755 index 6151f68c6a..2f75d07b8a --- a/vdi/hip_context.cpp +++ b/vdi/hip_context.cpp @@ -34,8 +34,6 @@ thread_local hipError_t g_lastError = hipSuccess; std::once_flag g_ihipInitialized; Device* host_device = nullptr; -std::map g_nullStreams; - void init() { if (!amd::Runtime::initialized()) { amd::IS_HIP = true; @@ -82,35 +80,19 @@ void setCurrentDevice(unsigned int index) { amd::HostQueue* getQueue(hipStream_t stream) { if (stream == nullptr) { - syncStreams(); return getNullStream(); } else { - hip::Stream* s = reinterpret_cast(stream); - if ((s->flags & hipStreamNonBlocking) == 0) { - getNullStream()->finish(); - } - return s->asHostQueue(); - } -} - -amd::HostQueue* getNullStream(Device& dev) { - auto stream = g_nullStreams.find(&dev); - if (stream == g_nullStreams.end()) { - amd::Device* device = dev.devices()[0]; - cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - amd::HostQueue* queue = new amd::HostQueue(*dev.asContext(), *device, properties, - amd::CommandQueue::RealTimeDisabled, - amd::CommandQueue::Priority::Normal); - g_nullStreams[&dev] = queue; + constexpr bool WaitNullStreamOnly = true; + amd::HostQueue* queue = reinterpret_cast(stream)->asHostQueue(); + iHipWaitActiveStreams(queue, WaitNullStreamOnly); return queue; } - return stream->second; } amd::HostQueue* getNullStream(amd::Context& ctx) { - for (auto& it : g_nullStreams) { - if (it.first->asContext() == &ctx) { - return it.second; + for (auto& it : g_devices) { + if (it->asContext() == &ctx) { + return it->NullStream(); } } return nullptr; @@ -118,7 +100,7 @@ amd::HostQueue* getNullStream(amd::Context& ctx) { amd::HostQueue* getNullStream() { Device* device = getCurrentDevice(); - return device ? getNullStream(*device) : nullptr; + return device ? device->NullStream() : nullptr; } }; @@ -232,6 +214,7 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { *dev = g_ctxtStack.top(); g_ctxtStack.pop(); } else { + DevLogError("Context Stack empty \n"); HIP_RETURN(hipErrorInvalidContext); } diff --git a/vdi/hip_conversions.hpp b/vdi/hip_conversions.hpp index d79a39c321..2a78617ad9 100644 --- a/vdi/hip_conversions.hpp +++ b/vdi/hip_conversions.hpp @@ -672,7 +672,7 @@ hipResourceDesc getResourceDesc(const HIP_RESOURCE_DESC& resDesc) { hipResourceDesc desc; desc.resType = getResourceType(resDesc.resType); - switch (resDesc.resType) { + switch (desc.resType) { case hipResourceTypeArray: desc.res.array.array = resDesc.res.array.hArray; break; @@ -703,7 +703,7 @@ HIP_RESOURCE_DESC getResourceDesc(const hipResourceDesc& resDesc) { HIP_RESOURCE_DESC desc; desc.resType = getResourceType(resDesc.resType); - switch (resDesc.resType) { + switch (desc.resType) { case HIP_RESOURCE_TYPE_ARRAY: desc.res.array.hArray = resDesc.res.array.array; break; @@ -895,4 +895,9 @@ HIP_RESOURCE_VIEW_DESC getResourceViewDesc(const hipResourceViewDesc& resViewDes return desc; } + +inline +size_t getElementSize(const hipChannelFormatDesc &desc) { + return (desc.x / 4) * getNumChannels(desc); +} }; diff --git a/vdi/hip_device.cpp b/vdi/hip_device.cpp index 30b2292271..5dfc595ee9 100644 --- a/vdi/hip_device.cpp +++ b/vdi/hip_device.cpp @@ -22,6 +22,20 @@ #include "hip_internal.hpp" +namespace hip { + +amd::HostQueue* Device::NullStream() { + amd::HostQueue* null_queue = null_stream_.asHostQueue(); + if (null_queue == nullptr) { + return nullptr; + } + // Wait for all active streams before executing commands on the default + iHipWaitActiveStreams(null_queue); + return null_queue; +} + +}; + hipError_t hipDeviceGet(hipDevice_t *device, int deviceId) { HIP_INIT_API(hipDeviceGet, device, deviceId); diff --git a/vdi/hip_device_runtime.cpp b/vdi/hip_device_runtime.cpp index febf64d116..86a1590533 100644 --- a/vdi/hip_device_runtime.cpp +++ b/vdi/hip_device_runtime.cpp @@ -239,6 +239,9 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) case hipDeviceAttributeCooperativeMultiDeviceLaunch: *pi = prop.cooperativeMultiDeviceLaunch; break; + case hipDeviceAttributeIntegrated: + *pi = prop.integrated; + break; case hipDeviceAttributeMaxTexture1DWidth: *pi = prop.maxTexture1D; break; @@ -432,8 +435,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { hipError_t hipDeviceSynchronize ( void ) { HIP_INIT_API(hipDeviceSynchronize); - hip::syncStreams(); - amd::HostQueue* queue = hip::getNullStream(); if (!queue) { diff --git a/vdi/hip_event.cpp b/vdi/hip_event.cpp index 677becd67e..f2738169a8 100644 --- a/vdi/hip_event.cpp +++ b/vdi/hip_event.cpp @@ -35,8 +35,9 @@ bool Event::ready() { hipError_t Event::query() { amd::ScopedLock lock(lock_); + // If event is not recorded, event_ is null, hence return hipSuccess if (event_ == nullptr) { - return hipErrorInvalidHandle; + return hipSuccess; } return ready() ? hipSuccess : hipErrorNotReady; @@ -45,8 +46,9 @@ hipError_t Event::query() { hipError_t Event::synchronize() { amd::ScopedLock lock(lock_); + // If event is not recorded, event_ is null, hence return hipSuccess if (event_ == nullptr) { - return hipErrorInvalidHandle; + return hipSuccess; } event_->awaitCompletion(); @@ -217,19 +219,14 @@ hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { HIP_RETURN(hipErrorInvalidHandle); } - hip::Event* e = reinterpret_cast(event); - - hip::Stream* s = reinterpret_cast(stream); amd::HostQueue* queue = hip::getQueue(stream); - - amd::Command* command = (s != nullptr && (s->flags & hipStreamNonBlocking)) ? - queue->getLastQueuedCommand(true) : nullptr; - + amd::Command* command = queue->getLastQueuedCommand(true); if (command == nullptr) { command = new amd::Marker(*queue, false); command->enqueue(); } + hip::Event* e = reinterpret_cast(event); e->addMarker(queue, command); HIP_RETURN(hipSuccess); diff --git a/vdi/hip_hcc.def.in b/vdi/hip_hcc.def.in old mode 100644 new mode 100755 index ef511ee43a..238d7fe02a --- a/vdi/hip_hcc.def.in +++ b/vdi/hip_hcc.def.in @@ -135,9 +135,14 @@ hipModuleLoad hipModuleLoadData hipModuleLoadDataEx hipModuleUnload +hipModuleOccupancyMaxPotentialBlockSize +hipModuleOccupancyMaxPotentialBlockSizeWithFlags +hipModuleOccupancyMaxActiveBlocksPerMultiprocessor +hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags hipOccupancyMaxPotentialBlockSize hipOccupancyMaxActiveBlocksPerMultiprocessor hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +hipFuncGetAttribute hipFuncGetAttributes hipPeekAtLastError hipPointerGetAttributes @@ -160,6 +165,8 @@ __hipPushCallConfiguration __hipRegisterFatBinary __hipRegisterFunction __hipRegisterVar +__hipRegisterSurface +__hipRegisterTexture __hipUnregisterFatBinary __gnu_h2f_ieee __gnu_f2h_ieee diff --git a/vdi/hip_hcc.map.in b/vdi/hip_hcc.map.in old mode 100644 new mode 100755 index 2139f45ab8..f2491cd283 --- a/vdi/hip_hcc.map.in +++ b/vdi/hip_hcc.map.in @@ -135,9 +135,14 @@ global: hipModuleLoadData; hipModuleLoadDataEx; hipModuleUnload; + hipModuleOccupancyMaxPotentialBlockSize; + hipModuleOccupancyMaxPotentialBlockSizeWithFlags; + hipModuleOccupancyMaxActiveBlocksPerMultiprocessor; + hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; hipOccupancyMaxPotentialBlockSize; hipOccupancyMaxActiveBlocksPerMultiprocessor; hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags; + hipFuncGetAttribute; hipFuncGetAttributes; hipPeekAtLastError; hipPointerGetAttributes; @@ -160,6 +165,8 @@ global: __hipRegisterFatBinary; __hipRegisterFunction; __hipRegisterVar; + __hipRegisterSurface; + __hipRegisterTexture; __hipUnregisterFatBinary; __gnu_h2f_ieee; __gnu_f2h_ieee; diff --git a/vdi/hip_internal.hpp b/vdi/hip_internal.hpp old mode 100755 new mode 100644 index 070c58d076..3d7c5249a8 --- a/vdi/hip_internal.hpp +++ b/vdi/hip_internal.hpp @@ -48,7 +48,7 @@ typedef struct ihipIpcMemHandle_st { } ihipIpcMemHandle_t; #ifdef _WIN32 - int getpid() { return _getpid();} + inline int getpid() { return _getpid(); } #endif #define HIP_INIT() \ @@ -78,6 +78,31 @@ class accelerator_view; }; namespace hip { + class Device; + + class Stream { + amd::HostQueue* queue_; + mutable amd::Monitor lock_; + Device* device_; + amd::CommandQueue::Priority priority_; + unsigned int flags_; + bool null_; + + public: + Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f = 0, bool null_stream = false); + bool Create(); + amd::HostQueue* asHostQueue(); + void Destroy(); + void Finish() const; + /// Get device ID associated with the current stream; + int DeviceId() const; + /// Returns if stream is null stream + bool Null() const { return null_; } + /// Returns the lock object for the current stream + amd::Monitor& Lock() const { return lock_; } + /// Returns the creation flags for the current stream + unsigned int Flags() const { return flags_; } + }; /// HIP Device class class Device { @@ -91,8 +116,11 @@ namespace hip { Stream null_stream_; //Maintain list of user enabled peers std::list userEnabledPeers; + public: - Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId) { assert(ctx != nullptr); } + Device(amd::Context* ctx, int devId): + context_(ctx), deviceId_(devId), null_stream_(this, amd::CommandQueue::Priority::Normal, 0, true) + { assert(ctx != nullptr); } ~Device() {} amd::Context* asContext() const { return context_; } @@ -119,6 +147,7 @@ namespace hip { return hipErrorPeerAccessNotEnabled; } } + amd::HostQueue* NullStream(); }; extern std::once_flag g_ihipInitialized; @@ -131,23 +160,17 @@ namespace hip { extern void init(); extern Device* getCurrentDevice(); + extern void setCurrentDevice(unsigned int index); /// Get VDI queue associated with hipStream /// Note: This follows the CUDA spec to sync with default streams /// and Blocking streams extern amd::HostQueue* getQueue(hipStream_t s); - /// Get default stream of the device - extern amd::HostQueue* getNullStream(Device&); /// Get default stream associated with the VDI context extern amd::HostQueue* getNullStream(amd::Context&); /// Get default stream of the thread extern amd::HostQueue* getNullStream(); - /// Sync Blocking streams on the current device - extern void syncStreams(); - /// Sync blocking streams on the given device - extern void syncStreams(int devId); - struct Function { amd::Kernel* function_; @@ -159,20 +182,6 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; - struct Stream { - amd::HostQueue* queue; - amd::Monitor lock; - Device* device; - amd::CommandQueue::Priority priority; - unsigned int flags; - - Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f); - void create(); - amd::HostQueue* asHostQueue(); - void destroy(); - void finish(); - }; - }; struct ihipExec_t { @@ -184,7 +193,7 @@ struct ihipExec_t { }; class PlatformState { - amd::Monitor lock_{"Guards global function map"}; + amd::Monitor lock_{"Guards global function map", true}; std::unordered_map>> modules_; bool initialized_{false}; @@ -227,13 +236,22 @@ public: std::vector< std::pair< hipModule_t, bool > >* modules; std::vector functions; }; + enum DeviceVarKind { + DVK_Variable, + DVK_Surface, + DVK_Texture + }; struct DeviceVar { + DeviceVarKind kind; void* shadowVptr; std::string hostVar; size_t size; std::vector< std::pair< hipModule_t, bool > >* modules; std::vector rvars; bool dyn_undef; + int type; // surface/texture type + int norm; // texture has normalized output + bool shadowAllocated = false; // shadow ptr is allocated on-demand and needs freeing. }; private: class Module { @@ -247,8 +265,11 @@ private: std::unordered_map functions_; std::unordered_multimap vars_; - // Map from the host shadow symbol to its device name. - std::unordered_map symbols_; + // Map from the host shadow symbol to its device name. As different modules + // may have the same name, each symbol is uniquely identified by a pair of + // module handle and its name. + std::unordered_map> symbols_; static PlatformState* platform_; @@ -256,6 +277,11 @@ private: ~PlatformState() {} public: static PlatformState& instance() { + if (platform_ == nullptr) { + // __hipRegisterFatBinary() will call this when app starts, thus + // there is no multiple entry issue here. + platform_ = new PlatformState(); + } return *platform_; } @@ -263,9 +289,9 @@ public: std::vector< std::pair >* unregisterVar(hipModule_t hmod); - bool findSymbol(const void *hostVar, std::string &devName); + bool findSymbol(const void *hostVar, hipModule_t &hmod, std::string &devName); PlatformState::DeviceVar* findVar(std::string hostVar, int deviceId, hipModule_t hmod); - void registerVarSym(const void *hostVar, const char *symbolName); + void registerVarSym(const void *hostVar, hipModule_t hmod, const char *symbolName); void registerVar(const char* symbolName, const DeviceVar& var); void registerFunction(const void* hostFunction, const DeviceFunction& func); @@ -278,15 +304,21 @@ public: hipDeviceptr_t* dev_ptr, size_t* size_ptr); bool getTexRef(const char* hostVar, hipModule_t hmod, textureReference** texRef); + bool getGlobalVarFromSymbol(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr); + bool getShadowVarInfo(std::string var_name, hipModule_t hmod, void** var_addr, size_t* var_size); void setupArgument(const void *arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); void popExec(ihipExec_t& exec); - }; +/// Wait all active streams on the blocking queue. The method enqueues a wait command and +/// doesn't stall the current thread +extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false); + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/vdi/hip_memory.cpp b/vdi/hip_memory.cpp old mode 100644 new mode 100755 index 1625f7703a..d4be73496d --- a/vdi/hip_memory.cpp +++ b/vdi/hip_memory.cpp @@ -48,11 +48,7 @@ hipError_t ihipFree(void *ptr) } if (amd::SvmBuffer::malloced(ptr)) { for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } - hip::syncStreams(dev->deviceId()); + dev->NullStream()->finish(); } amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr); return hipSuccess; @@ -179,8 +175,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin hipError_t hipExtMallocWithFlags(void** ptr, size_t sizeBytes, unsigned int flags) { HIP_INIT_API(hipExtMallocWithFlags, ptr, sizeBytes, flags); - if (flags != hipDeviceMallocDefault && - flags != hipDeviceMallocFinegrained) { + if (flags != hipDeviceMallocDefault && flags != hipDeviceMallocFinegrained) { HIP_RETURN(hipErrorInvalidValue); } @@ -205,6 +200,9 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) { // can't have both Coherent and NonCoherent flags set at the same time if ((flags & coherentFlags) == coherentFlags) { + DevLogPrintfError("Cannot have both coherent and non-coherent flags " + "at the same time, flags: %u coherent flags: %u \n", + flags, coherentFlags); HIP_RETURN(hipErrorInvalidValue); } @@ -238,7 +236,6 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { HIP_INIT_API(hipMemcpy, dst, src, sizeBytes, kind); - hip::syncStreams(); amd::HostQueue* queue = hip::getNullStream(); HIP_RETURN(ihipMemcpy(dst, src, sizeBytes, kind, *queue)); } @@ -283,11 +280,7 @@ hipError_t ihipArrayDestroy(hipArray* array) { return hipErrorInvalidValue; } for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } - hip::syncStreams(dev->deviceId()); + dev->NullStream()->finish(); } as_amd(memObj)->release(); @@ -417,16 +410,20 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, amd::Memory* buffer) { const amd::Image::Format imageFormat({channelOrder, channelType}); if (!imageFormat.isValid()) { + DevLogPrintfError("Invalid Image format for channel Order:%u Type:%u \n", + channelOrder, channelType); return nullptr; } amd::Context& context = *hip::getCurrentDevice()->asContext(); if (!imageFormat.isSupported(context, imageType)) { + DevLogPrintfError("Image type: %u not supported \n", imageType); return nullptr; } const std::vector& devices = context.devices(); if (!devices[0]->info().imageSupport_) { + DevLogPrintfError("Device: 0x%x does not support image \n", devices[0]); return nullptr; } @@ -436,6 +433,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, imageHeight, imageDepth, imageArraySize)) { + DevLogError("Image does not have valid dimensions \n"); return nullptr; } @@ -509,6 +507,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder, } if (!image->create(nullptr)) { + DevLogPrintfError("Cannot create image: 0x%x \n", image); delete image; return nullptr; } @@ -630,8 +629,7 @@ hipError_t hipMalloc3DArray(hipArray_t* array, hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) { HIP_INIT_API(hipHostGetFlags, flagsPtr, hostPtr); - if (flagsPtr == nullptr || - hostPtr == nullptr) { + if (flagsPtr == nullptr || hostPtr == nullptr) { HIP_RETURN(hipErrorInvalidValue); } @@ -657,6 +655,8 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) constexpr bool forceAlloc = true; if (!mem->create(hostPtr, sysMemAlloc, skipAlloc, forceAlloc)) { mem->release(); + DevLogPrintfError("Cannot create memory for size: %u with flags: %d \n", + sizeBytes, flags); HIP_RETURN(hipErrorOutOfMemory); } @@ -678,11 +678,7 @@ hipError_t hipHostUnregister(void* hostPtr) { HIP_INIT_API(hipHostUnregister, hostPtr); for (auto& dev : g_devices) { - amd::HostQueue* queue = hip::getNullStream(*dev->asContext()); - if (queue != nullptr) { - queue->finish(); - } - hip::syncStreams(dev->deviceId()); + dev->NullStream()->finish(); } if (amd::SvmBuffer::malloced(hostPtr)) { @@ -703,6 +699,7 @@ hipError_t hipHostUnregister(void* hostPtr) { } } + DevLogPrintfError("Cannot unregister host_ptr: 0x%x \n", hostPtr); HIP_RETURN(hipErrorInvalidValue); } @@ -712,116 +709,136 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) { }; -hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t count, +hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(hipMemcpyToSymbol, symbol, src, count, offset, kind); + HIP_INIT_API(hipMemcpyToSymbol, symbol, src, sizeBytes, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("cannot find symbol 0x%x \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot get global var: %s at device: %d \n", symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); + HIP_RETURN(hipMemcpy(device_ptr, src, sizeBytes, kind)); } -hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t count, +hipError_t hipMemcpyFromSymbol(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind) { - HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, count, offset, kind); + HIP_INIT_API(hipMemcpyFromSymbol, symbol, dst, sizeBytes, offset, kind); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); + HIP_RETURN(hipMemcpy(dst, device_ptr, sizeBytes, kind)); } -hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t count, +hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyToSymbolAsync, symbol, src, sizeBytes, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); + HIP_RETURN(hipMemcpyAsync(device_ptr, src, sizeBytes, kind, stream)); } -hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t count, +hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbol, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream) { - HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, count, offset, kind, stream); + HIP_INIT_API(hipMemcpyFromSymbolAsync, symbol, dst, sizeBytes, offset, kind, stream); size_t sym_size = 0; hipDeviceptr_t device_ptr = nullptr; + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("cannot find symbol: 0x%x \n", symbol); HIP_RETURN(hipErrorInvalidSymbol); } /* Get address and size for the global symbol */ - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &device_ptr, &sym_size)) { + DevLogPrintfError("Cannot find symbol Name: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } /* Size Check to make sure offset is correct */ - if ((offset + count) != sym_size) { + if ((offset + sizeBytes) > sym_size) { + DevLogPrintfError("Trying to access out of bounds, offset: %u sizeBytes: %u sym_size: %u \n", + offset, sizeBytes, sym_size); return HIP_RETURN(hipErrorInvalidDevicePointer); } device_ptr = reinterpret_cast
(device_ptr) + offset; /* Copy memory from source to destination address */ - HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, sizeBytes, kind, stream)); } hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, @@ -894,13 +911,13 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, hipStream_t stream, bool isAsync = false) { cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (is_valid(srcMemObj) == false) { + size_t dstOffset = 0; + amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); + if (!is_valid(srcMemObj) || (dstMemory == nullptr)) { return hipErrorInvalidValue; } amd::Image* srcImage = as_amd(srcMemObj)->asImage(); - size_t dstOffset = 0; - amd::Memory* dstMemory = getMemoryObject(dstDevice, dstOffset); amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcImage->getRowPitch(), srcImage->getSlicePitch())) { @@ -953,13 +970,13 @@ hipError_t ihipMemcpyDtoA(void* srcDevice, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { + size_t srcOffset = 0; + amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (is_valid(dstMemObj) == false) { + if ((srcMemory == nullptr) || !is_valid(dstMemObj)) { return hipErrorInvalidValue; } - size_t srcOffset = 0; - amd::Memory* srcMemory = getMemoryObject(srcDevice, srcOffset); amd::Image* dstImage = as_amd(dstMemObj)->asImage(); amd::BufferRect srcRect; @@ -1020,6 +1037,10 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, size_t dstOffset = 0; amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + if ((srcMemory == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1028,8 +1049,8 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, srcRect.end_ += srcOffset; amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcEnd)) { + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { return hipErrorInvalidValue; } @@ -1041,8 +1062,8 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, dstRect.end_ += dstOffset; amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstEnd)) { + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { return hipErrorInvalidValue; } @@ -1084,6 +1105,10 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, size_t srcOffset = 0; amd::Memory *srcMemory = getMemoryObject(srcDevice, srcOffset); + if ((srcMemory == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1092,8 +1117,8 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, srcRect.end_ += srcOffset; amd::Coord3D srcStart(srcRect.start_, 0, 0); - amd::Coord3D srcEnd(srcRect.end_, 1, 1); - if (!srcMemory->validateRegion(srcStart, srcEnd)) { + amd::Coord3D srcSize(srcRect.end_ - srcRect.start_, 1, 1); + if (!srcMemory->validateRegion(srcStart, srcSize)) { return hipErrorInvalidValue; } @@ -1139,6 +1164,10 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, size_t dstOffset = 0; amd::Memory *dstMemory = getMemoryObject(dstDevice, dstOffset); + if ((srcHost == nullptr) || (dstMemory == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1152,8 +1181,8 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, dstRect.end_ += dstOffset; amd::Coord3D dstStart(dstRect.start_, 0, 0); - amd::Coord3D dstEnd(dstRect.end_, 1, 1); - if (!dstMemory->validateRegion(dstStart, dstEnd)) { + amd::Coord3D dstSize(dstRect.end_ - dstRect.start_, 1, 1); + if (!dstMemory->validateRegion(dstStart, dstSize)) { return hipErrorInvalidValue; } @@ -1189,6 +1218,10 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch) { + if ((srcHost == nullptr) || (dstHost == nullptr)) { + return hipErrorInvalidValue; + } + amd::BufferRect srcRect; if (!srcRect.create(static_cast(srcOrigin), static_cast(copyRegion), srcRowPitch, srcSlicePitch)) { return hipErrorInvalidValue; @@ -1270,12 +1303,8 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { - if (srcHost == nullptr) { - return hipErrorInvalidValue; - } - cl_mem dstMemObj = reinterpret_cast(dstArray->data); - if (is_valid(dstMemObj) == false) { + if ((srcHost == nullptr) || !is_valid(dstMemObj)) { return hipErrorInvalidValue; } @@ -1327,11 +1356,7 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, hipStream_t stream, bool isAsync = false) { cl_mem srcMemObj = reinterpret_cast(srcArray->data); - if (!is_valid(srcMemObj)) { - return hipErrorInvalidValue; - } - - if (dstHost == nullptr) { + if (!is_valid(srcMemObj) || (dstHost == nullptr)) { return hipErrorInvalidValue; } @@ -1340,7 +1365,6 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, return hipErrorInvalidValue; } - amd::Image* srcImage = as_amd(srcMemObj)->asImage(); // HIP assumes the width is in bytes, but OCL assumes it's in pixels. const size_t elementSize = srcImage->getImageFormat().getElementSize(); @@ -1630,6 +1654,32 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { HIP_RETURN(ihipMemcpyParam3D(pCopy, stream, true)); } +hipError_t packFillMemoryCommand(amd::Memory* memory, size_t offset, int value, size_t valueSize, + size_t sizeBytes, amd::HostQueue* queue, bool isAsync = false) { + + if ((memory == nullptr) || (queue == nullptr)) { + return hipErrorInvalidValue; + } + + amd::Command::EventWaitList waitList; + amd::Coord3D fillOffset(offset, 0, 0); + amd::Coord3D fillSize(sizeBytes, 1, 1); + amd::FillMemoryCommand* command = + new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + &value, valueSize, fillOffset, fillSize); + if (command == nullptr) { + return hipErrorOutOfMemory; + } + command->enqueue(); + + if (!isAsync) { + command->awaitCompletion(); + } + + command->release(); + return hipSuccess; +} + hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, hipStream_t stream, bool isAsync = false) { if (sizeBytes == 0) { @@ -1642,33 +1692,51 @@ hipError_t ihipMemset(void* dst, int value, size_t valueSize, size_t sizeBytes, } size_t offset = 0; - amd::HostQueue* queue = hip::getQueue(stream); amd::Memory* memory = getMemoryObject(dst, offset); - - if (memory != nullptr) { - // Device memory - amd::Command::EventWaitList waitList; - amd::Coord3D fillOffset(offset, 0, 0); - amd::Coord3D fillSize(sizeBytes, 1, 1); - amd::FillMemoryCommand* command = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), - &value, valueSize, fillOffset, fillSize); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - - command->enqueue(); - if (!isAsync) { - command->awaitCompletion(); - } - command->release(); - } else { + if (memory == nullptr) { // Host alloced memory memset(dst, value, sizeBytes); + return hipSuccess; } - return hipSuccess; + hipError_t hip_error = hipSuccess; + amd::HostQueue* queue = hip::getQueue(stream); + + int32_t value32 = 0; + const size_t dwordModSize = (sizeBytes % sizeof(int32_t)); + + if (sizeBytes/sizeof(int32_t) > 0) { + if (valueSize == sizeof(int8_t)) { + value = value & 0xff; + value32 = ((value << 24) | (value << 16) | (value << 8) | (value)); + } else if (valueSize == sizeof(int16_t)) { + value = value & 0xffff; + value32 = ((value<<16) | (value)); + } else if(valueSize == sizeof(int32_t)) { + value32 = value; + } else { + LogPrintfError("Unsupported Pattern size: %u \n", valueSize); + return hipErrorInvalidValue; + } + // If dwordModSize is != 0 then we will do a second fillBuffer Command + // on the same stream below, dont wait, do the first call async. + hip_error = packFillMemoryCommand(memory, offset, value32, sizeof(int32_t), + sizeBytes - dwordModSize, queue, + ((dwordModSize != 0) || isAsync)); + if(hip_error != hipSuccess) { + return hip_error; + } + } + + if (dwordModSize != 0) { + void* new_dst = reinterpret_cast((reinterpret_cast
(dst) + + sizeBytes) - dwordModSize); + memory = getMemoryObject(new_dst, offset); + hip_error = packFillMemoryCommand(memory, offset, value, valueSize, + dwordModSize, queue, isAsync); + } + + return hip_error; } hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { @@ -1837,12 +1905,16 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) { /* Get AMD::Memory object corresponding to this pointer */ amd_mem_obj = getMemoryObject(dev_ptr, offset); if (amd_mem_obj == nullptr) { + DevLogPrintfError("Cannot retrieve amd_mem_obj for dev_ptr: 0x%x with offset: %u \n", + dev_ptr, offset); HIP_RETURN(hipErrorInvalidDevicePointer); } /* Get Device::Memory object pointer */ dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0],false); if (dev_mem_obj == nullptr) { + DevLogPrintfError("Cannot get Device memory for amd_mem_obj: 0x%x dev_ptr: 0x%x offset: %u \n", + amd_mem_obj, dev_ptr, offset); HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -1870,6 +1942,7 @@ hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigne amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr); if (amd_mem_obj == nullptr) { + DevLogPrintfError("cannot attach ipc_handle: with ipc_size: %u flags: %u", ihandle->psize, flags); HIP_RETURN(hipErrorInvalidDevicePointer); } @@ -1886,7 +1959,6 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) { amd::Device* device = nullptr; amd::Memory* amd_mem_obj = nullptr; - hip::syncStreams(); hip::getNullStream()->finish(); if (dev_ptr == nullptr) { @@ -1954,9 +2026,12 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void } ++device; } + DevLogPrintfError("Cannot find memory object context, memObjCtx: 0x%x \n", + memObjCtx); HIP_RETURN(hipErrorInvalidDevice); } + DevLogPrintfError("Cannot get amd_mem_obj for ptr: 0x%x \n", ptr); HIP_RETURN(hipErrorInvalidValue); } diff --git a/vdi/hip_module.cpp b/vdi/hip_module.cpp index 1e5f7d8b31..5d09f88293 100755 --- a/vdi/hip_module.cpp +++ b/vdi/hip_module.cpp @@ -94,10 +94,12 @@ hipError_t hipModuleUnload(hipModule_t hmod) amd::Program* program = as_amd(reinterpret_cast(hmod)); if(!PlatformState::instance().unregisterFunc(hmod)) { + DevLogPrintfError("Cannot unregister module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidSymbol); } if(!ihipModuleUnregisterGlobal(hmod)) { + DevLogPrintfError("Cannot unregister Global vars for module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidSymbol); } @@ -134,6 +136,7 @@ inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* modu = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) { + DevLogPrintfError("Could not get undefined Variables for Module: 0x%x \n", *module); return false; } @@ -147,8 +150,15 @@ inline bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* modu = new texture(); memset(tex_hptr, 0x00, sizeof(texture)); - PlatformState::DeviceVar dvar{ reinterpret_cast(tex_hptr), it->c_str(), sizeof(*tex_hptr), modules, - std::vector{ g_devices.size()}, true }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + reinterpret_cast(tex_hptr), + it->c_str(), + sizeof(*tex_hptr), + modules, + std::vector{g_devices.size()}, + true, + /*type*/ 0, + /*norm*/ 0}; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -163,6 +173,7 @@ inline bool ihipModuleRegisterFunc(amd::Program* program, hipModule_t* module) { // Get all the global func names from COMGR if (!dev_program->getGlobalFuncFromCodeObj(&func_names)) { + DevLogPrintfError("Could not get Global Funcs from Code Obj for Module: 0x%x \n", *module); return false; } @@ -180,6 +191,7 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (!dev_program->getGlobalVarFromCodeObj(&var_names)) { + DevLogPrintfError("Could not get Global vars from Code Obj for Module: 0x%x \n", *module); return false; } @@ -189,8 +201,15 @@ inline bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) modules->at(dev) = std::make_pair(*module, true); } - PlatformState::DeviceVar dvar{nullptr, it->c_str(), 0, modules, - std::vector{ g_devices.size()}, false }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + nullptr, + it->c_str(), + 0, + modules, + std::vector{g_devices.size()}, + false, + /*type*/ 0, + /*norm*/ 0}; PlatformState::instance().registerVar(it->c_str(), dvar); } @@ -244,6 +263,8 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const ch HIP_INIT_API(hipModuleGetFunction, hfunc, hmod, name); if (!PlatformState::instance().findModFunc(hfunc, hmod, name)) { + DevLogPrintfError("Cannot find the function: %s for module: 0x%x \n", + name, hmod); HIP_RETURN(hipErrorNotFound); } HIP_RETURN(hipSuccess); @@ -256,12 +277,77 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h /* Get address and size for the global symbol */ if (!PlatformState::instance().getGlobalVar(name, ihipGetDevice(), hmod, dptr, bytes)) { + DevLogPrintfError("Cannot find global Var: %s for module: 0x%x at device: %d \n", + name, hmod, ihipGetDevice()); HIP_RETURN(hipErrorNotFound); } HIP_RETURN(hipSuccess); } +hipError_t hipFuncGetAttribute(int* value, hipFunction_attribute attrib, hipFunction_t hfunc) { + HIP_INIT_API(hipFuncGetAttribute, value, attrib, hfunc); + + if ((value == nullptr) || (hfunc == nullptr)) { + HIP_RETURN(hipErrorInvalidValue); + } + + hip::Function* function = hip::Function::asFunction(hfunc); + if (function == nullptr) { + HIP_RETURN(hipErrorInvalidHandle); + } + + amd::Kernel* kernel = function->function_; + if (kernel == nullptr) { + HIP_RETURN(hipErrorInvalidDeviceFunction); + } + + const device::Kernel::WorkGroupInfo* wrkGrpInfo + = kernel->getDeviceKernel(*(hip::getCurrentDevice()->devices()[0]))->workGroupInfo(); + if (wrkGrpInfo == nullptr) { + HIP_RETURN(hipErrorMissingConfiguration); + } + + switch(attrib) { + case HIP_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_ + - wrkGrpInfo->privateMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: + *value = static_cast(wrkGrpInfo->wavefrontPerSIMD_ + * wrkGrpInfo->wavefrontSize_); + break; + case HIP_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->localMemSize_); + break; + case HIP_FUNC_ATTRIBUTE_NUM_REGS: + *value = static_cast(wrkGrpInfo->availableGPRs_); + break; + case HIP_FUNC_ATTRIBUTE_PTX_VERSION: + *value = 30; // Defaults to 3.0 as HCC + break; + case HIP_FUNC_ATTRIBUTE_BINARY_VERSION: + *value = static_cast(kernel->signature().version()); + break; + case HIP_FUNC_ATTRIBUTE_CACHE_MODE_CA: + *value = 0; + break; + case HIP_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: + *value = static_cast(wrkGrpInfo->availableLDSSize_); + break; + case HIP_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: + *value = 0; + break; + default: + HIP_RETURN(hipErrorInvalidValue); + } + + HIP_RETURN(hipSuccess); +} + hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) { HIP_INIT_API(hipFuncGetAttributes, attr, func); @@ -590,6 +676,8 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const /* Get address and size for the global symbol */ if (!PlatformState::instance().getTexRef(name, hmod, texRef)) { + DevLogPrintfError("Cannot get texRef for name: %s at module:0x%x \n", + name, hmod); HIP_RETURN(hipErrorNotFound); } @@ -599,4 +687,3 @@ hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const HIP_RETURN(hipSuccess); } - diff --git a/vdi/hip_peer.cpp b/vdi/hip_peer.cpp old mode 100644 new mode 100755 diff --git a/vdi/hip_platform.cpp b/vdi/hip_platform.cpp index 5ece473e06..f7cc1754f2 100755 --- a/vdi/hip_platform.cpp +++ b/vdi/hip_platform.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. */ #include - +#include #include "hip_internal.hpp" #include "platform/program.hpp" #include "platform/runtime.hpp" @@ -30,7 +30,7 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; -PlatformState* PlatformState::platform_ = new PlatformState(); +PlatformState* PlatformState::platform_; // Initiaized as nullptr by default struct __CudaFatBinaryWrapper { unsigned int magic; @@ -132,6 +132,8 @@ extern "C" std::vector>* __hipRegisterFatBinary(con { const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast(data); if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) { + DevLogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", + fbwrapper->magic, fbwrapper->version); return nullptr; } @@ -162,7 +164,8 @@ void PlatformState::digestFatBinary(const void* data, std::vectoraddDeviceProgram(*ctx->devices()[0], code_objs[dev].first, code_objs[dev].second)) { + if (CL_SUCCESS == program->addDeviceProgram( + *ctx->devices()[0], code_objs[dev].first, code_objs[dev].second, false)) { programs.at(dev) = std::make_pair(reinterpret_cast(as_cl(program)) , false); } } @@ -218,7 +221,7 @@ std::vector< std::pair >* PlatformState::unregisterVar(hipMod DeviceVar& dvar = it->second; if ((*dvar.modules)[0].first == hmod) { rmodules = dvar.modules; - if (dvar.dyn_undef) { + if (dvar.shadowAllocated) { texture* tex_hptr = reinterpret_cast *>(dvar.shadowVptr); delete tex_hptr; @@ -266,18 +269,21 @@ PlatformState::DeviceVar* PlatformState::findVar(std::string hostVar, int device return dvar; } -bool PlatformState::findSymbol(const void *hostVar, std::string &symbolName) { +bool PlatformState::findSymbol(const void *hostVar, + hipModule_t &hmod, std::string &symbolName) { auto it = symbols_.find(hostVar); if (it != symbols_.end()) { - symbolName = it->second; + hmod = it->second.first; + symbolName = it->second.second; return true; } + DevLogPrintfError("Could not find the Symbol: %s \n", symbolName.c_str()); return false; } -void PlatformState::registerVarSym(const void *hostVar, const char *symbolName) { +void PlatformState::registerVarSym(const void* hostVar, hipModule_t hmod, const char* symbolName) { amd::ScopedLock lock(lock_); - symbols_.insert(std::make_pair(hostVar, std::string(symbolName))); + symbols_.insert(std::make_pair(hostVar, std::make_pair(hmod, std::string(symbolName)))); } void PlatformState::registerVar(const char* hostvar, @@ -298,6 +304,7 @@ bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFunc const auto it = dev_program->kernels().find(std::string(func_name)); if (it == dev_program->kernels().cend()) { + DevLogPrintfError("Could not find the function %s \n", func_name); return false; } @@ -318,6 +325,7 @@ bool PlatformState::getShadowVarInfo(std::string var_name, hipModule_t hmod, *var_size = dvar->size; return true; } else { + DevLogPrintfError("Cannot find Var name: %s in module: 0x%x \n", var_name.c_str(), hmod); return false; } } @@ -357,6 +365,7 @@ bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const ch PlatformState::DeviceFunction& devFunc = func_it->second; if (devFunc.functions[ihipGetDevice()] == 0) { if(!createFunc(&devFunc.functions[ihipGetDevice()], hmod, name)) { + DevLogPrintfError("Could not create a function: %s at module: 0x%x \n", name, hmod); return false; } } @@ -364,6 +373,7 @@ bool PlatformState::findModFunc(hipFunction_t* hfunc, hipModule_t hmod, const ch return true; } } + DevLogPrintfError("Cannot find module: 0x%x in PlatformState Module Map \n", hmod); return false; } @@ -372,15 +382,22 @@ bool PlatformState::createFunc(hipFunction_t* hfunc, hipModule_t hmod, const cha const amd::Symbol* symbol = program->findSymbol(name); if (!symbol) { + DevLogPrintfError("Cannot find Symbol with name: %s \n", name); return false; } amd::Kernel* kernel = new amd::Kernel(*program, *symbol, name); if (!kernel) { + DevLogPrintfError("Could not create a new kernel with name: %s \n", name); return false; } hip::Function* f = new hip::Function(kernel); + if (!f) { + DevLogPrintfError("Could not create a new function with name: %s \n", name); + return false; + } + *hfunc = f->asHipFunction(); return true; @@ -398,6 +415,7 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { amd::Program* program = as_amd(reinterpret_cast(module)); program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + DevLogPrintfError("Build error for module: 0x%x at device: %u \n", module, deviceId); return nullptr; } (*devFunc.modules)[deviceId].second = true; @@ -414,6 +432,7 @@ hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { } return devFunc.functions[deviceId]; } + DevLogPrintfError("Cannot find function: 0x%x in PlatformState \n", hostFunction); return nullptr; } @@ -425,6 +444,7 @@ bool PlatformState::getFuncAttr(const void* hostFunction, const auto it = functions_.find(hostFunction); if (it == functions_.cend()) { + DevLogPrintfError("Cannot find hostFunction 0x%x \n", hostFunction); return false; } @@ -434,12 +454,15 @@ bool PlatformState::getFuncAttr(const void* hostFunction, /* If module has not been initialized yet, build the kernel now*/ if (!(*devFunc.modules)[deviceId].second) { if (nullptr == PlatformState::instance().getFunc(hostFunction, deviceId)) { + DevLogPrintfError("Cannot get hostFunction: 0x%x for deviceId:%d \n", hostFunction, deviceId); return false; } } amd::Program* program = as_amd(reinterpret_cast((*devFunc.modules)[deviceId].first)); if (!ihipGetFuncAttributes(devFunc.deviceName.c_str(), program, func_attr)) { + DevLogPrintfError("Cannot get Func attributes for function: %s \n", + devFunc.deviceName.c_str()); return false; } return true; @@ -449,14 +472,32 @@ bool PlatformState::getTexRef(const char* hostVar, hipModule_t hmod, textureRefe amd::ScopedLock lock(lock_); DeviceVar* dvar = findVar(std::string(hostVar), ihipGetDevice(), hmod); if (dvar == nullptr) { + DevLogPrintfError("Cannot find var:%s for creating texture reference at module: 0x%x \n", + hostVar, hmod); return false; } - if (!dvar->dyn_undef) { + switch (dvar->kind) { + case PlatformState::DVK_Variable: + // TODO: Need to define a target-specific symbol info to indicate the device + // variable kind, i.e. regular variable, texture or surface. + // Before that, have to assume the specified variable is a texture or + // surface reference variable. + dvar->kind = DVK_Texture; + // FALL THROUGH + case PlatformState::DVK_Texture: + break; + default: + // If it's already used as non-texture variable, bail out. return false; } - *texRef = new (dvar->shadowVptr) texture{}; + if (!dvar->shadowVptr) { + dvar->shadowVptr = new texture{}; + dvar->shadowAllocated = true; + } + *texRef = reinterpret_cast(dvar->shadowVptr); + registerVarSym(dvar->shadowVptr, hmod, hostVar); return true; } @@ -475,6 +516,7 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t amd::Program* program = as_amd(reinterpret_cast((*dvar->modules)[deviceId].first)); program->setVarInfoCallBack(&getSvarInfo); if (CL_SUCCESS != program->build(g_devices[deviceId]->devices(), nullptr, nullptr, nullptr)) { + DevLogPrintfError("Build Failure for module: 0x%x \n", hmod); return false; } (*dvar->modules)[deviceId].second = true; @@ -487,17 +529,31 @@ bool PlatformState::getGlobalVar(const char* hostVar, int deviceId, hipModule_t dvar->rvars[deviceId].amd_mem_obj_ = amd_mem_obj; amd::MemObjMap::AddMemObj(device_ptr, amd_mem_obj); } else { - LogError("[HIP] __hipRegisterVar cannot find kernel for device \n"); + LogError("__hipRegisterVar cannot find kernel for device \n"); } } *size_ptr = dvar->rvars[deviceId].getvarsize(); *dev_ptr = dvar->rvars[deviceId].getdeviceptr(); return true; } else { + DevLogPrintfError("Could not find global var: %s at module:0x%x \n", hostVar, hmod); return false; } } +bool PlatformState::getGlobalVarFromSymbol(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, + size_t* size_ptr) { + hipModule_t hmod; + std::string symbolName; + if (!PlatformState::instance().findSymbol(hostVar, hmod, symbolName)) { + return false; + } + return PlatformState::instance().getGlobalVar(symbolName.c_str(), + ihipGetDevice(), hmod, + dev_ptr, size_ptr); +} + void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { auto& arguments = execStack_.top().arguments_; @@ -552,11 +608,56 @@ extern "C" void __hipRegisterVar( int constant, // Whether this variable is constant int global) // Unknown, always 0 { - PlatformState::DeviceVar dvar{var, std::string{ hostVar }, size, modules, - std::vector{g_devices.size()}, false }; + PlatformState::DeviceVar dvar{PlatformState::DVK_Variable, + var, + std::string{hostVar}, + size, + modules, + std::vector{g_devices.size()}, + false, + /*type*/ 0, + /*norm*/ 0}; - PlatformState::instance().registerVar(hostVar, dvar); - PlatformState::instance().registerVarSym(var, deviceVar); + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); +} + +extern "C" void __hipRegisterSurface(std::vector>* + modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int ext) { + PlatformState::DeviceVar dvar{PlatformState::DVK_Surface, + var, + std::string{hostVar}, + sizeof(surfaceReference), // Copy whole surfaceReference + modules, + std::vector{g_devices.size()}, + false, + type, + /*norm*/ 0}; + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); +} + +extern "C" void __hipRegisterTexture(std::vector>* + modules, // The device modules containing code object + void* var, // The shadow variable in host code + char* hostVar, // Variable name in host code + char* deviceVar, // Variable name in device code + int type, int norm, int ext) { + PlatformState::DeviceVar dvar{PlatformState::DVK_Texture, + var, + std::string{hostVar}, + sizeof(textureReference), // Copy whole textureReference so far. + modules, + std::vector{g_devices.size()}, + false, + type, + norm}; + PlatformState::instance().registerVar(hostVar, dvar); + PlatformState::instance().registerVarSym(var, nullptr, deviceVar); } extern "C" void __hipUnregisterFatBinary(std::vector< std::pair >* modules) @@ -636,12 +737,14 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) PlatformState::instance().popExec(exec); hip::Stream* stream = reinterpret_cast(exec.hStream_); - int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice(); + int deviceId = (stream != nullptr)? stream->DeviceId() : ihipGetDevice(); if (deviceId == -1) { + DevLogPrintfError("Wrong DeviceId: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); if (func == nullptr) { + DevLogPrintfError("Could not retrieve hostFunction: 0x%x \n", hostFunction); HIP_RETURN(hipErrorInvalidDeviceFunction); } @@ -661,13 +764,17 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol); + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } size_t size = 0; - if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if(!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, devPtr, &size)) { + DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", + symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -676,13 +783,17 @@ hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) { hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) { HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol); + hipModule_t hmod; std::string symbolName; - if (!PlatformState::instance().findSymbol(symbol, symbolName)) { + if (!PlatformState::instance().findSymbol(symbol, hmod, symbolName)) { + DevLogPrintfError("Cannot find symbol: %s \n", symbolName.c_str()); HIP_RETURN(hipErrorInvalidSymbol); } hipDeviceptr_t devPtr = nullptr; - if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), nullptr, + if (!PlatformState::instance().getGlobalVar(symbolName.c_str(), ihipGetDevice(), hmod, &devPtr, sizePtr)) { + DevLogPrintfError("Cannot find global variable device ptr for symbol: %s at device: %d \n", + symbolName.c_str(), ihipGetDevice()); HIP_RETURN(hipErrorInvalidSymbol); } HIP_RETURN(hipSuccess); @@ -701,10 +812,12 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]); if (dev_program == nullptr) { + DevLogPrintfError("Cannot get Device Function for module: 0x%x \n", hmod); HIP_RETURN(hipErrorInvalidDeviceFunction); } /* Find the global Symbols */ if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) { + DevLogPrintfError("Cannot create Global Var obj for symbol: %s \n", name); HIP_RETURN(hipErrorInvalidSymbol); } @@ -773,24 +886,19 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( } extern "C" { -// FIXME: Need to replace `uint32_t` with `int` finally. -hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* blockSize, - hipFunction_t f, size_t dynSharedMemPerBlk, - uint32_t blockSizeLimit) +hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + const void* f, size_t dynSharedMemPerBlk, + int blockSizeLimit) { HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); - if ((gridSize == nullptr) || (blockSize == nullptr)) { return HIP_RETURN(hipErrorInvalidValue); } - hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); if (func == nullptr) { - func = f; + return HIP_RETURN(hipErrorInvalidValue); } - const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; - int num_grids = 0; int num_blocks = 0; hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( @@ -802,11 +910,81 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block HIP_RETURN(ret); } -// FIXME: Need to replace `uint32_t` with `int` finally. -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, - hipFunction_t f, - uint32_t blockSize, - size_t dynamicSMemSize) +hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize, + hipFunction_t f, size_t dynSharedMemPerBlk, + int blockSizeLimit, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk, blockSizeLimit, flags); + if ((gridSize == nullptr) || (blockSize == nullptr)) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + int num_grids = 0; + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, &num_grids, device, f, 0, dynSharedMemPerBlk,true); + if (ret == hipSuccess) { + *blockSize = num_blocks; + *gridSize = num_grids; + } + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynSharedMemPerBlk); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + hipFunction_t f, int blockSize, + size_t dynSharedMemPerBlk, unsigned int flags) +{ + HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynSharedMemPerBlk, flags); + if (numBlocks == nullptr) { + return HIP_RETURN(hipErrorInvalidValue); + } + const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; + + int num_blocks = 0; + hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor( + &num_blocks, nullptr, device, f, blockSize, dynSharedMemPerBlk, false); + *numBlocks = num_blocks; + HIP_RETURN(ret); +} + +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, + const void* f, int blockSize, size_t dynamicSMemSize) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize); if (numBlocks == nullptr) { @@ -815,7 +993,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); if (func == nullptr) { - func = f; + return HIP_RETURN(hipErrorInvalidValue); } const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; @@ -827,12 +1005,9 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(uint32_t* numBlocks, HIP_RETURN(ret); } -// FIXME: Need to replace `uint32_t` with `int` finally. -hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBlocks, - hipFunction_t f, - uint32_t blockSize, - size_t dynamicSMemSize, - unsigned int flags) +hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, + const void* f, + int blockSize, size_t dynamicSMemSize, unsigned int flags) { HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize, flags); if (numBlocks == nullptr) { @@ -841,7 +1016,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(uint32_t* numBl hipFunction_t func = PlatformState::instance().getFunc(f, ihipGetDevice()); if (func == nullptr) { - func = f; + return HIP_RETURN(hipErrorInvalidValue); } const amd::Device& device = *hip::getCurrentDevice()->devices()[0]; @@ -1105,8 +1280,9 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, stream); hip::Stream* s = reinterpret_cast(stream); - int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice(); + int deviceId = (s != nullptr)? s->DeviceId() : ihipGetDevice(); if (deviceId == -1) { + DevLogPrintfError("Wrong Device Id: %d \n", deviceId); HIP_RETURN(hipErrorNoDevice); } hipFunction_t func = PlatformState::instance().getFunc(hostFunction, deviceId); @@ -1114,6 +1290,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction, #ifdef ATI_OS_LINUX const auto it = hip_impl::functions().find(reinterpret_cast(hostFunction)); if (it == hip_impl::functions().cend()) { + DevLogPrintfError("Cannot find function: 0x%x \n", hostFunction); HIP_RETURN(hipErrorInvalidDeviceFunction); } func = it->second; diff --git a/vdi/hip_prof_gen.py b/vdi/hip_prof_gen.py index 0195f86f16..2eb10e9ca3 100755 --- a/vdi/hip_prof_gen.py +++ b/vdi/hip_prof_gen.py @@ -78,7 +78,6 @@ def filtr_api_args(args_str): args_str = re.sub(r'\s*$', r'', args_str); args_str = re.sub(r'\s*,\s*', r',', args_str); args_str = re.sub(r'\s+', r' ', args_str); - #args_str = re.sub(r'void \*', r'void* ', args_str); args_str = re.sub(r'\s*(\*+)\s*', r'\1 ', args_str); args_str = re.sub(r'(enum|struct) ', '', args_str); return args_str @@ -384,6 +383,7 @@ def generate_prof_header(f, api_map, opts_map): f.write('// automatically generated sources\n') f.write('#ifndef _HIP_PROF_STR_H\n'); f.write('#define _HIP_PROF_STR_H\n'); + f.write('#define HIP_PROF_VER 1\n') # Generating dummy macro for non-public API f.write('\n// Dummy API primitives\n') @@ -426,7 +426,7 @@ def generate_prof_header(f, api_map, opts_map): # Generating the callbacks data structure f.write('\n// HIP API callbacks data structure\n') f.write( - 'struct hip_api_data_t {\n' + + 'typedef struct hip_api_data_s {\n' + ' uint64_t correlation_id;\n' + ' uint32_t phase;\n' + ' union {\n' @@ -442,7 +442,7 @@ def generate_prof_header(f, api_map, opts_map): f.write(' } ' + name + ';\n') f.write( ' } args;\n' + - '};\n' + '} hip_api_data_t;\n' ) # Generating the callbacks args data filling macros @@ -467,30 +467,29 @@ def generate_prof_header(f, api_map, opts_map): f.write('#define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data)\n') # Generating the method for the API string, name and parameters - if False: - f.write('\n') - f.write('#if 0\n') - f.write('#include \n'); - f.write('#include \n'); - f.write('// HIP API string method, method name and parameters\n') - f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') - f.write(' std::ostringstream oss;\n') - f.write(' switch (id) {\n') - for name, args in api_map.items(): - f.write(' case HIP_API_ID_' + name + ':\n') - f.write(' oss << "' + name + '("') - for ind in range(0, len(args)): - arg_tuple = args[ind] - arg_name = arg_tuple[1] - if ind != 0: f.write(' << ","') - f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) - f.write('\n << ")";\n') - f.write(' break;\n') - f.write(' default: oss << "unknown";\n') - f.write(' };\n') - f.write(' return strdup(oss.str().c_str());\n') - f.write('};\n') - f.write('#endif\n') + f.write('\n') + f.write('#if HIP_PROF_HIP_API_STRING\n') + f.write('#include \n'); + f.write('#include \n'); + f.write('// HIP API string method, method name and parameters\n') + f.write('const char* hipApiString(hip_api_id_t id, const hip_api_data_t* data) {\n') + f.write(' std::ostringstream oss;\n') + f.write(' switch (id) {\n') + for name, args in api_map.items(): + f.write(' case HIP_API_ID_' + name + ':\n') + f.write(' oss << "' + name + '("') + for ind in range(0, len(args)): + arg_tuple = args[ind] + arg_name = arg_tuple[1] + if ind != 0: f.write(' << ","') + f.write('\n << " ' + arg_name + '=" << data->args.' + name + '.' + arg_name) + f.write('\n << ")";\n') + f.write(' break;\n') + f.write(' default: oss << "unknown";\n') + f.write(' };\n') + f.write(' return strdup(oss.str().c_str());\n') + f.write('};\n') + f.write('#endif // HIP_PROF_HIP_API_STRING\n') f.write('#endif // _HIP_PROF_STR_H\n'); diff --git a/vdi/hip_rtc.cpp b/vdi/hip_rtc.cpp old mode 100644 new mode 100755 index 9897b98b7f..8c82337405 --- a/vdi/hip_rtc.cpp +++ b/vdi/hip_rtc.cpp @@ -109,6 +109,7 @@ char* demangle(const char* loweredName) { int status = 0; char* demangledName = DEMANGLE(loweredName, nullptr, nullptr, &status); if (status != 0) { + DevLogPrintfError("Cannot demangle loweredName: %s \n", loweredName); return nullptr; } #elif defined(_WIN32) @@ -118,6 +119,8 @@ char* demangle(const char* loweredName) { UNDECORATED_SIZE/ sizeof(*demangledName), UNDNAME_COMPLETE)) { free(demangledName); + DevLogPrintfError("Cannot undecorate loweredName: %s demangledName: %s \n", + loweredName, demangedName); return nullptr; } #else @@ -192,6 +195,7 @@ const char* hiprtcGetErrorString(hiprtcResult x) { case HIPRTC_ERROR_INTERNAL_ERROR: return "HIPRTC_ERROR_INTERNAL_ERROR"; default: + DevLogPrintfError("Invalid HIPRTC error code: %d \n", x); return nullptr; }; diff --git a/vdi/hip_stream.cpp b/vdi/hip_stream.cpp index eac42c0203..fbcd223ed2 100644 --- a/vdi/hip_stream.cpp +++ b/vdi/hip_stream.cpp @@ -42,80 +42,131 @@ class StreamCallback { namespace hip { -void syncStreams(int devId) { - amd::ScopedLock lock(streamSetLock); +// ================================================================================================ +Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, + unsigned int f, bool null_stream) + : queue_(nullptr), lock_("Stream Callback lock"), device_(dev), + priority_(p), flags_(f), null_(null_stream) {} - for (const auto& it : streamSet) { - if (it->device->deviceId() == devId) { - it->finish(); +// ================================================================================================ +bool Stream::Create() { + cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; + queue_ = new amd::HostQueue(*device_->asContext(), *device_->devices()[0], properties, + amd::CommandQueue::RealTimeDisabled, priority_); + // Create a host queue + bool result = (queue_ != nullptr) ? queue_->create() : false; + // Insert just created stream into the list of the blocking queues + if (result) { + if (!(flags_ & hipStreamNonBlocking)) { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(this); + } + } else { + Destroy(); + } + return result; +} + +// ================================================================================================ +amd::HostQueue* Stream::asHostQueue() { + // Access to the stream object is lock protected, because possible allocation + amd::ScopedLock l(Lock()); + if (queue_ == nullptr) { + // Create the host queue for the first time + if (!Create()) { + return nullptr; } } + return queue_; } -void syncStreams() { - syncStreams(getCurrentDevice()->deviceId()); -} +// ================================================================================================ +void Stream::Destroy() { + if (queue_ != nullptr) { + queue_->release(); + queue_ = nullptr; -Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) : - queue(nullptr), lock("Stream Callback lock"), device(dev), priority(p), flags(f) {} - -void Stream::create() { - cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE; - queue = new amd::HostQueue(*device->asContext(), *device->devices()[0], properties, - amd::CommandQueue::RealTimeDisabled, priority); - assert(queue != nullptr); - queue->create(); -} - -amd::HostQueue* Stream::asHostQueue() { - if (queue == nullptr) { - create(); + amd::ScopedLock lock(streamSetLock); + streamSet.erase(this); } - return queue; + delete this; } -void Stream::destroy() { - if (queue != nullptr) { - queue->release(); - queue = nullptr; +// ================================================================================================ +void Stream::Finish() const { + if (queue_ != nullptr) { + queue_->finish(); } } -void Stream::finish() { - if (queue != nullptr) { - queue->finish(); - } +// ================================================================================================ +int Stream::DeviceId() const { + return device_->deviceId(); } }; +// ================================================================================================ +void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) { + amd::Command::EventWaitList eventWaitList; + { + amd::ScopedLock lock(streamSetLock); + + for (const auto& stream : streamSet) { + amd::HostQueue* active_queue = stream->asHostQueue(); + // If it's the current device + if ((&active_queue->device() == &blocking_queue->device()) && + // and it's not the current stream + (active_queue != blocking_queue) && + // check for a wait on the null stream + (stream->Null() == wait_null_stream)) { + // Get the last valid command + amd::Command* command = active_queue->getLastQueuedCommand(true); + if ((command != nullptr) && + // Check the current active status + (command->status() != CL_COMPLETE)) { + eventWaitList.push_back(command); + } + } + } + } + + // Check if we have to wait anything + if (eventWaitList.size() > 0) { + amd::Command* command = new amd::Marker(*blocking_queue, false, eventWaitList); + if (command != nullptr) { + command->enqueue(); + command->release(); + } + } + + // Release all active commands. It's safe after the marker was enqueued + for (const auto& it : eventWaitList) { + it->release(); + } +} + +// ================================================================================================ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status, void* user_data) { hipError_t status = hipSuccess; StreamCallback* cbo = reinterpret_cast(user_data); { - amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->lock); + amd::ScopedLock lock(reinterpret_cast(cbo->stream_)->Lock()); cbo->callBack_(cbo->stream_, status, cbo->userData_); } cbo->command_->release(); delete cbo; } -static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) { +// ================================================================================================ +static hipError_t ihipStreamCreate(hipStream_t* stream, + unsigned int flags, amd::CommandQueue::Priority priority) { hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags); if (hStream == nullptr) { return hipErrorOutOfMemory; } - if (!(flags & hipStreamNonBlocking)) { - hip::syncStreams(); - - { - amd::ScopedLock lock(streamSetLock); - streamSet.insert(hStream); - } - } - *stream = reinterpret_cast(hStream); ClPrint(amd::LOG_INFO, amd::LOG_API, "ihipStreamCreate: %zx", hStream); @@ -123,18 +174,21 @@ static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd: return hipSuccess; } +// ================================================================================================ hipError_t hipStreamCreateWithFlags(hipStream_t *stream, unsigned int flags) { HIP_INIT_API(hipStreamCreateWithFlags, stream, flags); HIP_RETURN(ihipStreamCreate(stream, flags, amd::CommandQueue::Priority::Normal)); } +// ================================================================================================ hipError_t hipStreamCreate(hipStream_t *stream) { HIP_INIT_API(hipStreamCreate, stream); HIP_RETURN(ihipStreamCreate(stream, hipStreamDefault, amd::CommandQueue::Priority::Normal)); } +// ================================================================================================ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, int priority) { HIP_INIT_API(hipStreamCreateWithPriority, stream, flags, priority); @@ -147,6 +201,7 @@ hipError_t hipStreamCreateWithPriority(hipStream_t* stream, unsigned int flags, return HIP_RETURN(ihipStreamCreate(stream, flags, static_cast(priority))); } +// ================================================================================================ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) { HIP_INIT_API(hipDeviceGetStreamPriorityRange, leastPriority, greatestPriority); @@ -160,13 +215,12 @@ hipError_t hipDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPrio return HIP_RETURN(hipSuccess); } -hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { +// ================================================================================================ +hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int* flags) { HIP_INIT_API(hipStreamGetFlags, stream, flags); - hip::Stream* hStream = reinterpret_cast(stream); - - if(flags != nullptr && hStream != nullptr) { - *flags = hStream->flags; + if ((flags != nullptr) && (stream != nullptr)) { + *flags = reinterpret_cast(stream)->Flags(); } else { HIP_RETURN(hipErrorInvalidValue); } @@ -174,15 +228,17 @@ hipError_t hipStreamGetFlags(hipStream_t stream, unsigned int *flags) { HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(hipStreamSynchronize, stream); - amd::HostQueue* hostQueue = hip::getQueue(stream); - hostQueue->finish(); + // Wait for the current host queue + hip::getQueue(stream)->finish(); HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_INIT_API(hipStreamDestroy, stream); @@ -190,50 +246,35 @@ hipError_t hipStreamDestroy(hipStream_t stream) { HIP_RETURN(hipErrorInvalidHandle); } - amd::ScopedLock lock(streamSetLock); - - hip::Stream* hStream = reinterpret_cast(stream); - - hStream->destroy(); - streamSet.erase(hStream); - - delete hStream; + reinterpret_cast(stream)->Destroy(); HIP_RETURN(hipSuccess); } +// ================================================================================================ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { HIP_INIT_API(hipStreamWaitEvent, stream, event, flags); - amd::HostQueue* queue; - - if (stream == nullptr) { - queue = hip::getNullStream(); - } else { - queue = reinterpret_cast(stream)->asHostQueue(); - } - if (event == nullptr) { HIP_RETURN(hipErrorInvalidHandle); } + amd::HostQueue* queue = hip::getQueue(stream); + hip::Event* e = reinterpret_cast(event); - return HIP_RETURN(e->streamWait(queue, flags)); + HIP_RETURN(e->streamWait(queue, flags)); } +// ================================================================================================ hipError_t hipStreamQuery(hipStream_t stream) { HIP_INIT_API(hipStreamQuery, stream); - amd::HostQueue* hostQueue; - if (stream == nullptr) { - hostQueue = hip::getNullStream(); - } else { - hostQueue = reinterpret_cast(stream)->asHostQueue(); - } + amd::HostQueue* hostQueue = hip::getQueue(stream); amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { + // Nothing was submitted to the queue HIP_RETURN(hipSuccess); } @@ -246,12 +287,12 @@ hipError_t hipStreamQuery(hipStream_t stream) { HIP_RETURN(status); } +// ================================================================================================ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback, void* userData, unsigned int flags) { HIP_INIT_API(hipStreamAddCallback, stream, callback, userData, flags); - amd::HostQueue* hostQueue = reinterpret_cast - (stream)->asHostQueue(); + amd::HostQueue* hostQueue = hip::getQueue(stream); amd::Command* command = hostQueue->getLastQueuedCommand(true); if (command == nullptr) { amd::Command::EventWaitList eventWaitList; @@ -270,5 +311,3 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback HIP_RETURN(hipSuccess); } - - diff --git a/vdi/hip_texture.cpp b/vdi/hip_texture.cpp old mode 100644 new mode 100755 index 0f8e818948..94026c8e33 --- a/vdi/hip_texture.cpp +++ b/vdi/hip_texture.cpp @@ -24,6 +24,9 @@ #include "hip_conversions.hpp" #include "platform/sampler.hpp" +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false); + struct __hip_texture { uint32_t imageSRD[HIP_IMAGE_OBJECT_SIZE_DWORD]; uint32_t samplerSRD[HIP_SAMPLER_OBJECT_SIZE_DWORD]; @@ -100,7 +103,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject, if ((pResDesc->resType == hipResourceTypeLinear) && ((pResDesc->res.linear.devPtr == nullptr) || (!amd::isMultipleOf(pResDesc->res.linear.devPtr, info.imageBaseAddressAlignment_)) || - (pResDesc->res.linear.sizeInBytes >= info.imageMaxBufferSize_))) { + ((pResDesc->res.linear.sizeInBytes / hip::getElementSize(pResDesc->res.linear.desc)) >= info.imageMaxBufferSize_))) { return hipErrorInvalidValue; } @@ -335,8 +338,7 @@ hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectResourceDesc, pResDesc, texObject); - if ((pResDesc == nullptr) || - (texObject == nullptr)) { + if ((pResDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -349,8 +351,7 @@ hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectResourceViewDesc, pResViewDesc, texObject); - if ((pResViewDesc == nullptr) || - (texObject == nullptr)) { + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -363,8 +364,7 @@ hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipGetTextureObjectTextureDesc, pTexDesc, texObject); - if ((pTexDesc == nullptr) || - (texObject == nullptr)) { + if ((pTexDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -383,8 +383,8 @@ inline bool ihipGetTextureAlignmentOffset(size_t* offset, // If the device memory pointer was returned from hipMalloc(), // the offset is guaranteed to be 0 and NULL may be passed as the offset parameter. - if ((alignedOffset != 0) && - (offset == nullptr)) { + if ((alignedOffset != 0) && (offset == nullptr)) { + DevLogPrintfError("Texture object not aligned with offset %u \n", alignedOffset); return false; } @@ -476,7 +476,20 @@ hipError_t hipBindTexture2D(size_t* offset, size_t pitch) { HIP_INIT_API(hipBindTexture2D, offset, texref, devPtr, desc, width, height, pitch); - HIP_RETURN(ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture2D(offset, texref, devPtr, desc, width, height, pitch); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t ihipBindTextureToArray(const textureReference* texref, @@ -510,7 +523,20 @@ hipError_t hipBindTextureToArray(const textureReference* texref, const hipChannelFormatDesc* desc) { HIP_INIT_API(hipBindTextureToArray, texref, array, desc); - HIP_RETURN(ihipBindTextureToArray(texref, array, desc)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToArray(texref, array, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, @@ -544,7 +570,20 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, const hipChannelFormatDesc* desc) { HIP_INIT_API(hipBindTextureToMipmappedArray, texref, mipmappedArray, desc); - HIP_RETURN(ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTextureToMipmappedArray(texref, mipmappedArray, desc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipUnbindTexture(const textureReference* texref) { @@ -567,15 +606,27 @@ hipError_t hipBindTexture(size_t* offset, size_t size) { HIP_INIT_API(hipBindTexture, offset, texref, devPtr, desc, size); - HIP_RETURN(ihipBindTexture(offset, texref, devPtr, desc, size)); + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texref, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + hipError_t err = ihipBindTexture(offset, texref, devPtr, desc, size); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) { HIP_INIT_API(hipGetChannelDesc, desc, array); - if ((desc == nullptr) || - (array == nullptr)) { + if ((desc == nullptr) || (array == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -590,8 +641,7 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* texref) { HIP_INIT_API(hipGetTextureAlignmentOffset, offset, texref); - if ((offset == nullptr) || - (texref == nullptr)) { + if ((offset == nullptr) || (texref == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -604,9 +654,12 @@ hipError_t hipGetTextureAlignmentOffset(size_t* offset, hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol) { HIP_INIT_API(hipGetTextureReference, texref, symbol); - assert(0 && "Unimplemented"); + if (texref == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + *texref = reinterpret_cast(symbol); - HIP_RETURN(hipErrorNotSupported); + HIP_RETURN(hipSuccess); } hipError_t hipTexRefSetFormat(textureReference* texRef, @@ -670,13 +723,14 @@ hipError_t hipTexRefGetAddressMode(hipTextureAddressMode* pam, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetAddressMode, pam, texRef, dim); - if ((pam == nullptr) || - (texRef == nullptr)) { + if ((pam == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } // Currently, the only valid value for dim are 0 and 1. if ((dim != 0) || (dim != 1)) { + DevLogPrintfError("Currently only 2 dimensions (0,1) are valid," + "dim : %d \n", dim); HIP_RETURN(hipErrorInvalidValue); } @@ -695,6 +749,8 @@ hipError_t hipTexRefSetAddressMode(textureReference* texRef, } if ((dim < 0) || (dim > 2)) { + DevLogPrintfError("Currently only 3 dimensions (0,1,2) are valid," + "dim : %d \n", dim); HIP_RETURN(hipErrorInvalidValue); } @@ -708,8 +764,7 @@ hipError_t hipTexRefGetArray(hipArray_t* pArray, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetArray, pArray, texRef); - if ((pArray == nullptr) || - (texRef == nullptr)) { + if ((pArray == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -723,8 +778,9 @@ hipError_t hipTexRefGetArray(hipArray_t* pArray, switch (resDesc.resType) { case hipResourceTypeLinear: case hipResourceTypePitch2D: - case hipResourceTypeMipmappedArray: + case hipResourceTypeMipmappedArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeArray: *pArray = resDesc.res.array.array; break; @@ -738,8 +794,7 @@ hipError_t hipTexRefSetArray(textureReference* texRef, unsigned int flags) { HIP_INIT_API(hipTexRefSetArray, texRef, array, flags); - if ((texRef == nullptr) || - (array == nullptr)) { + if ((texRef == nullptr) || (array == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -747,6 +802,14 @@ hipError_t hipTexRefSetArray(textureReference* texRef, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -761,7 +824,13 @@ hipError_t hipTexRefSetArray(textureReference* texRef, hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(array, format); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, @@ -769,8 +838,7 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetAddress, dptr, texRef); - if ((dptr == nullptr) || - (texRef == nullptr)) { + if ((dptr == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -778,6 +846,8 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // TODO use ihipGetTextureObjectResourceDesc() to not pollute the API trace. hipError_t error = hipGetTextureObjectResourceDesc(&resDesc, texRef->textureObject); if (error != hipSuccess) { + DevLogPrintfError("hipGetTextureObjectResourceDesc failed with error code: %s \n", + hipGetErrorName(error)); return HIP_RETURN(error); } @@ -786,8 +856,9 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, // If the texture reference is not bound to any device memory range, // return hipErroInvalidValue. case hipResourceTypeArray: - case hipResourceTypeMipmappedArray: + case hipResourceTypeMipmappedArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeLinear: *dptr = resDesc.res.linear.devPtr; break; @@ -809,6 +880,14 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -829,7 +908,13 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, hipTextureDesc texDesc = hip::getTextureDesc(texRef); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexRefSetAddress2D(textureReference* texRef, @@ -838,11 +923,18 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, size_t Pitch) { HIP_INIT_API(hipTexRefSetAddress2D, texRef, desc, dptr, Pitch); - if ((texRef == nullptr) || - (desc == nullptr)) { + if ((texRef == nullptr) || (desc == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -858,7 +950,13 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, hipTextureDesc texDesc = hip::getTextureDesc(texRef); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, nullptr); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { @@ -870,8 +968,7 @@ hipError_t hipTexRefGetBorderColor(float* pBorderColor, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetBorderColor, pBorderColor, texRef); - if ((pBorderColor == nullptr) || - (texRef == nullptr)) { + if ((pBorderColor == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -887,8 +984,7 @@ hipError_t hipTexRefGetFilterMode(hipTextureFilterMode* pfm, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFilterMode, pfm, texRef); - if ((pfm == nullptr) || - (texRef == nullptr)) { + if ((pfm == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -902,8 +998,7 @@ hipError_t hipTexRefGetFlags(unsigned int* pFlags, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFlags, pFlags, texRef); - if ((pFlags == nullptr) || - (texRef == nullptr)) { + if ((pFlags == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -930,8 +1025,7 @@ hipError_t hipTexRefGetFormat(hipArray_Format* pFormat, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetFormat, pFormat, pNumChannels, texRef); - if ((pFormat == nullptr) || - (pNumChannels == nullptr) || + if ((pFormat == nullptr) || (pNumChannels == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -947,8 +1041,7 @@ hipError_t hipTexRefGetMaxAnisotropy(int* pmaxAnsio, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMaxAnisotropy, pmaxAnsio, texRef); - if ((pmaxAnsio == nullptr) || - (texRef == nullptr)) { + if ((pmaxAnsio == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -962,8 +1055,7 @@ hipError_t hipTexRefGetMipmapFilterMode(hipTextureFilterMode* pfm, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapFilterMode, pfm, texRef); - if ((pfm == nullptr) || - (texRef == nullptr)) { + if ((pfm == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -977,8 +1069,7 @@ hipError_t hipTexRefGetMipmapLevelBias(float* pbias, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapLevelBias, pbias, texRef); - if ((pbias == nullptr) || - (texRef == nullptr)) { + if ((pbias == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -993,8 +1084,7 @@ hipError_t hipTexRefGetMipmapLevelClamp(float* pminMipmapLevelClamp, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmapLevelClamp, pminMipmapLevelClamp, pmaxMipmapLevelClamp, texRef); - if ((pminMipmapLevelClamp == nullptr) || - (pmaxMipmapLevelClamp == nullptr) || + if ((pminMipmapLevelClamp == nullptr) || (pmaxMipmapLevelClamp == nullptr) || (texRef == nullptr)){ HIP_RETURN(hipErrorInvalidValue); } @@ -1010,8 +1100,7 @@ hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, // TODO overload operator<<(ostream&, textureReference&). HIP_INIT_API(hipTexRefGetMipmappedArray, pArray, &texRef); - if ((pArray == nullptr) || - (texRef == nullptr)) { + if ((pArray == nullptr) || (texRef == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1025,8 +1114,9 @@ hipError_t hipTexRefGetMipmappedArray(hipMipmappedArray_t* pArray, switch (resDesc.resType) { case hipResourceTypeLinear: case hipResourceTypePitch2D: - case hipResourceTypeArray: + case hipResourceTypeArray: { HIP_RETURN(hipErrorInvalidValue); + } case hipResourceTypeMipmappedArray: *pArray = resDesc.res.mipmap.mipmap; break; @@ -1039,8 +1129,7 @@ hipError_t hipTexRefSetBorderColor(textureReference* texRef, float* pBorderColor) { HIP_INIT_API(hipTexRefSetBorderColor, texRef, pBorderColor); - if ((texRef == nullptr) || - (pBorderColor == nullptr)) { + if ((texRef == nullptr) || (pBorderColor == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1110,8 +1199,7 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, unsigned int Flags) { HIP_INIT_API(hipTexRefSetMipmappedArray, texRef, mipmappedArray, Flags); - if ((texRef == nullptr) || - (mipmappedArray == nullptr)) { + if ((texRef == nullptr) || (mipmappedArray == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1119,6 +1207,14 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, HIP_RETURN(hipErrorInvalidValue); } + hipDeviceptr_t refDevPtr = nullptr; + size_t refDevSize = 0; + if (!PlatformState::instance().getGlobalVarFromSymbol(texRef, ihipGetDevice(), &refDevPtr, + &refDevSize)) { + HIP_RETURN(hipErrorInvalidSymbol); + } + assert(refDevSize == sizeof(textureReference)); + // Any previous address or HIP array state associated with the texture reference is superseded by this function. // Any memory previously bound to hTexRef is unbound. // No need to check for errors. @@ -1133,7 +1229,13 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, hipResourceViewFormat format = hip::getResourceViewFormat(hip::getChannelFormatDesc(texRef->numChannels, texRef->format)); hipResourceViewDesc resViewDesc = hip::getResourceViewDesc(mipmappedArray, format); - HIP_RETURN(ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc)); + hipError_t err = ihipCreateTextureObject(&texRef->textureObject, &resDesc, &texDesc, &resViewDesc); + if (err != hipSuccess) { + HIP_RETURN(err); + } + // Copy to device. + amd::HostQueue* queue = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); } hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, @@ -1142,8 +1244,7 @@ hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { HIP_INIT_API(hipTexObjectCreate, pTexObject, pResDesc, pTexDesc, pResViewDesc); - if ((pTexObject == nullptr) || - (pResDesc == nullptr) || (pTexDesc == nullptr)) { + if ((pTexObject == nullptr) || (pResDesc == nullptr) || (pTexDesc == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1168,8 +1269,7 @@ hipError_t hipTexObjectGetResourceDesc(HIP_RESOURCE_DESC* pResDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetResourceDesc, pResDesc, texObject); - if ((pResDesc == nullptr) || - (texObject == nullptr)) { + if ((pResDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1182,8 +1282,7 @@ hipError_t hipTexObjectGetResourceViewDesc(HIP_RESOURCE_VIEW_DESC* pResViewDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetResourceViewDesc, pResViewDesc, texObject); - if ((pResViewDesc == nullptr) || - (texObject == nullptr)) { + if ((pResViewDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); } @@ -1196,8 +1295,7 @@ hipError_t hipTexObjectGetTextureDesc(HIP_TEXTURE_DESC* pTexDesc, hipTextureObject_t texObject) { HIP_INIT_API(hipTexObjectGetTextureDesc, pTexDesc, texObject); - if ((pTexDesc == nullptr) || - (texObject == nullptr)) { + if ((pTexDesc == nullptr) || (texObject == nullptr)) { HIP_RETURN(hipErrorInvalidValue); }