diff --git a/.gitignore b/.gitignore index 22cd23f2c6..3dfb1afef5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ hip-amdinternal HIP-Examples lib packages - +build bin/hipInfo bin/hipBusBandwidth bin/hipDispatchLatency diff --git a/CMakeLists.txt b/CMakeLists.txt index b3ea5a3ca3..4c89c93668 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,9 +141,6 @@ add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) ############################# # Build steps ############################# -# Rebuild cmake cache updates .hipInfo and .hipVersion -add_custom_target(update_build_and_version_info COMMAND make rebuild_cache) - # Build clang hipify if enabled add_subdirectory(hipify-clang) @@ -179,7 +176,9 @@ if(HIP_PLATFORM STREQUAL "hcc") src/hip_peer.cpp src/hip_stream.cpp src/hip_module.cpp + src/hip_db.cpp src/grid_launch.cpp + src/hip_texture.cpp src/env.cpp) set(SOURCE_FILES_DEVICE @@ -211,13 +210,6 @@ if(HIP_PLATFORM STREQUAL "hcc") endforeach() target_link_libraries(hip_hcc INTERFACE hcc::hccrt;hcc::hc_am) - # Generate hcc_version.txt - add_custom_target(query_hcc_version COMMAND ${HCC_HOME}/bin/hcc --version > ${PROJECT_BINARY_DIR}/hcc_version.tmp) - add_custom_target(check_hcc_version COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PROJECT_BINARY_DIR}/hcc_version.tmp ${PROJECT_BINARY_DIR}/hcc_version.txt DEPENDS query_hcc_version) - set_source_files_properties(${PROJECT_BINARY_DIR}/hcc_version.txt PROPERTIES GENERATED TRUE) - set_source_files_properties(${SOURCE_FILES_RUNTIME} ${SOURCE_FILES_DEVICE} PROPERTIES OBJECT_DEPENDS ${PROJECT_BINARY_DIR}/hcc_version.txt) - add_dependencies(hip_hcc check_hcc_version update_build_and_version_info) - # Generate .hipInfo file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo}) endif() diff --git a/INSTALL.md b/INSTALL.md index ef584dafa0..eb219e19f7 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -51,15 +51,16 @@ Run hipconfig (instructions below assume default installation path) : /opt/rocm/bin/hipconfig --full ``` -Compile and run the [square sample](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/tree/master/samples/0_Intro/square). +Compile and run the [square sample](https://github.com/ROCm-Developer-Tools/HIP/tree/master/samples/0_Intro/square). # Building HIP from source HIP source code is available and the project can be built from source on the HCC platform. 1. Follow the above steps to install and validate the binary packages. -2. Download HIP source code (from the [GitHub repot](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP).) -3. Build and install HIP (This is the simple version assuming default paths ; see below for additional options.) +2. Download HIP source code (from the [GitHub repot](https://github.com/ROCm-Developer-Tools/HIP).) +3. Install HIP build-time dependencies using ```sudo apt-get install libelf-dev```. +4. Build and install HIP (This is the simple version assuming default paths ; see below for additional options.) ``` cd HIP mkdir build diff --git a/Jenkinsfile b/Jenkinsfile new file mode 100644 index 0000000000..ce59476626 --- /dev/null +++ b/Jenkinsfile @@ -0,0 +1,466 @@ +#!/usr/bin/env groovy + +// Generated from snippet generator 'properties; set job properties' +properties([buildDiscarder(logRotator( + artifactDaysToKeepStr: '', + artifactNumToKeepStr: '', + daysToKeepStr: '', + numToKeepStr: '10')), + disableConcurrentBuilds(), + parameters([booleanParam( name: 'push_image_to_docker_hub', defaultValue: false, description: 'Push hip & hcc image to rocm docker-hub' )]), + [$class: 'CopyArtifactPermissionProperty', projectNames: '*'] + ]) + +//////////////////////////////////////////////////////////////////////// +// -- AUXILLARY HELPER FUNCTIONS + +//////////////////////////////////////////////////////////////////////// +// Return build number of upstream job +@NonCPS +int get_upstream_build_num( ) +{ + def upstream_cause = currentBuild.rawBuild.getCause( hudson.model.Cause$UpstreamCause ) + if( upstream_cause == null) + return 0 + + return upstream_cause.getUpstreamBuild() +} + +//////////////////////////////////////////////////////////////////////// +// Return project name of upstream job +@NonCPS +String get_upstream_build_project( ) +{ + def upstream_cause = currentBuild.rawBuild.getCause( hudson.model.Cause$UpstreamCause ) + if( upstream_cause == null) + return null + + return upstream_cause.getUpstreamProject() +} + +//////////////////////////////////////////////////////////////////////// +// Construct the relative path of the build directory +String build_directory_rel( String build_config ) +{ + if( build_config.equalsIgnoreCase( 'release' ) ) + { + return "build/release" + } + else + { + return "build/debug" + } +} + +//////////////////////////////////////////////////////////////////////// +// Lots of images are created above; no apparent way to delete images:tags with docker global variable +def docker_clean_images( String org, String image_name ) +{ + // Check if any images exist first grepping for image names + int docker_images = sh( script: "docker images | grep \"${org}/${image_name}\"", returnStatus: true ) + + // The script returns a 0 for success (images were found ) + if( docker_images == 0 ) + { + // run bash script to clean images:tags after successful pushing + sh "docker images | grep \"${org}/${image_name}\" | awk '{print \$1 \":\" \$2}' | xargs docker rmi" + } +} + +//////////////////////////////////////////////////////////////////////// +// -- BUILD RELATED FUNCTIONS + +//////////////////////////////////////////////////////////////////////// +// Checkout source code, source dependencies and update version number numbers +// Returns a relative path to the directory where the source exists in the workspace +String checkout_and_version( String platform ) +{ + String source_dir_rel = "src" + String source_hip_rel = "${source_dir_rel}/hip" + + stage("${platform} clone") + { + dir( "${source_hip_rel}" ) + { + // checkout hip + checkout([ + $class: 'GitSCM', + branches: scm.branches, + doGenerateSubmoduleConfigurations: scm.doGenerateSubmoduleConfigurations, + extensions: scm.extensions + [[$class: 'CleanCheckout']], + userRemoteConfigs: scm.userRemoteConfigs + ]) + } + } + + return source_hip_rel +} + + +//////////////////////////////////////////////////////////////////////// +// This creates the docker image that we use to build the project in +// The docker images contains all dependencies, including OS platform, to build +def docker_build_image( String platform, String org, String optional_build_parm, String source_hip_rel, String from_image ) +{ + String build_image_name = "build-ubuntu-16.04" + String dockerfile_name = "dockerfile-build-ubuntu-16.04" + def build_image = null + + stage("${platform} build image") + { + dir("${source_hip_rel}") + { + def user_uid = sh( script: 'id -u', returnStdout: true ).trim() + + // Docker 17.05 introduced the ability to use ARG values in FROM statements + // Docker inspect failing on FROM statements with ARG https://issues.jenkins-ci.org/browse/JENKINS-44836 + // build_image = docker.build( "${org}/${build_image_name}:latest", "--pull -f docker/${dockerfile_name} --build-arg user_uid=${user_uid} --build-arg base_image=${from_image} ." ) + + // JENKINS-44836 workaround by using a bash script instead of docker.build() + sh "docker build -t ${org}/${build_image_name}:latest -f docker/${dockerfile_name} ${optional_build_parm} --build-arg user_uid=${user_uid} --build-arg base_image=${from_image} ." + build_image = docker.image( "${org}/${build_image_name}:latest" ) + } + } + + return build_image +} + +//////////////////////////////////////////////////////////////////////// +// This encapsulates the cmake configure, build and package commands +// Leverages docker containers to encapsulate the build in a fixed environment +def docker_build_inside_image( def build_image, String inside_args, String platform, String optional_configure, String build_config, String source_hip_rel, String build_dir_rel ) +{ + String source_hip_abs = pwd() + "/" + source_hip_rel + + build_image.inside( inside_args ) + { + stage("${platform} make ${build_config}") + { + // The rm command needs to run as sudo because the test steps below create files owned by root + sh """#!/usr/bin/env bash + set -x + sudo rm -rf ${build_dir_rel} + mkdir -p ${build_dir_rel} + cd ${build_dir_rel} + cmake -DCMAKE_BUILD_TYPE=${build_config} -DCMAKE_INSTALL_PREFIX=staging ${optional_configure} ${source_hip_abs} + make -j\$(nproc) + """ + } + + // Cap the maximum amount of testing, in case of hangs + timeout(time: 1, unit: 'HOURS') + { + stage("${platform} unit testing") + { + sh """#!/usr/bin/env bash + set -x + cd ${build_dir_rel} + make install -j\$(nproc) + make build_tests -i -j\$(nproc) + make test + """ + // If unit tests output a junit or xunit file in the future, jenkins can parse that file + // to display test results on the dashboard + // junit "${build_dir_rel}/*.xml" + } + } + + // Only create packages from hcc based builds + if( platform.toLowerCase( ).startsWith( 'hcc-' ) ) + { + stage("${platform} packaging") + { + sh """#!/usr/bin/env bash + set -x + cd ${build_dir_rel} + make package + """ + + // No matter the base platform, all packages have the same name + // Only upload 1 set of packages, so we don't have a race condition uploading packages + if( platform.toLowerCase( ).startsWith( 'hcc-ctu' ) ) + { + archiveArtifacts artifacts: "${build_dir_rel}/*.deb", fingerprint: true + archiveArtifacts artifacts: "${build_dir_rel}/*.rpm", fingerprint: true + } + } + } + } + + return void +} + +//////////////////////////////////////////////////////////////////////// +// This builds a fresh docker image FROM a clean base image, with no build dependencies included +// Uploads the new docker image to internal artifactory +String docker_upload_artifactory( String hcc_ver, String artifactory_org, String from_image, String source_hip_rel, String build_dir_rel ) +{ + def hip_install_image = null + String image_name = "hip-${hcc_ver}-ubuntu-16.04" + + stage( 'artifactory' ) + { + println "artifactory_org: ${artifactory_org}" + + // We copy the docker files into the bin directory where the .deb lives so that it's a clean build everytime + sh "cp -r ${source_hip_rel}/docker/* ${build_dir_rel}" + + // Docker 17.05 introduced the ability to use ARG values in FROM statements + // Docker inspect failing on FROM statements with ARG https://issues.jenkins-ci.org/browse/JENKINS-44836 + // hip_install_image = docker.build( "${artifactory_org}/${image_name}:${env.BUILD_NUMBER}", "--pull -f ${build_dir_rel}/dockerfile-hip-ubuntu-16.04 --build-arg base_image=${from_image} ${build_dir_rel}" ) + + // JENKINS-44836 workaround by using a bash script instead of docker.build() + sh "docker build -t ${artifactory_org}/${image_name} --pull -f ${build_dir_rel}/dockerfile-hip-ubuntu-16.04 --build-arg base_image=${from_image} ${build_dir_rel}" + hip_install_image = docker.image( "${artifactory_org}/${image_name}" ) + + // The connection to artifactory can fail sometimes, but this should not be treated as a build fail + try + { + // Don't push pull requests to artifactory, these tend to accumulate over time + if( env.BRANCH_NAME.toLowerCase( ).startsWith( 'pr-' ) ) + { + println 'Pull Request (PR-xxx) detected; NOT pushing to artifactory' + } + else + { + docker.withRegistry('http://compute-artifactory:5001', 'artifactory-cred' ) + { + hip_install_image.push( "${env.BUILD_NUMBER}" ) + hip_install_image.push( 'latest' ) + } + } + } + catch( err ) + { + currentBuild.result = 'SUCCESS' + } + } + + return image_name +} + +//////////////////////////////////////////////////////////////////////// +// Uploads the new docker image to the public docker-hub +def docker_upload_dockerhub( String local_org, String image_name, String remote_org ) +{ + stage( 'docker-hub' ) + { + // Do not treat failures to push to docker-hub as a build fail + try + { + sh """#!/usr/bin/env bash + set -x + echo inside sh + docker tag ${local_org}/${image_name} ${remote_org}/${image_name} + """ + + docker_hub_image = docker.image( "${remote_org}/${image_name}" ) + + docker.withRegistry('https://registry.hub.docker.com', 'docker-hub-cred' ) + { + docker_hub_image.push( "${env.BUILD_NUMBER}" ) + docker_hub_image.push( 'latest' ) + } + } + catch( err ) + { + currentBuild.result = 'SUCCESS' + } + } +} + +//////////////////////////////////////////////////////////////////////// +// hcc_integration_testing +// This function is sets up compilation and testing of HiP on a compiler downloaded from an upstream build +// Integration testing is centered around docker and constructing clean test environments every time + +// NOTES: I have implemeneted integration testing 3 different ways, and I've come to the conclusion nothing is perfect +// 1. I've tried having HCC push the test compiler to artifactory, and having HiP download the test docker image from artifactory +// a. The act of uploading and downloading images from artifactory takes minutes +// b. There is no good way of deleting images from a repository. You have to use an arcane CURL command and I don't know how +// to keep the password secret. These test integration images are meant to be ephemeral. +// 2. I tried 'docker save' to export a docker image into a tarball, and transfering the image through 'copy artifacts plugin' +// a. The HCC docker image uncompressed is over 1GB +// b. Compressing the docker image takes even longer than uploading the image to artifactory +// 3. Download the HCC .deb and dockerfile through 'copy artifacts plugin'. Create a new HCC image on the fly +// a. There is inefficency in building a new ubuntu image and installing HCC twice (once in HCC build, once here) +// b. This solution doesn't scale when we start testing downstream libraries + +// I've implemented solution #3 above, probably transitioning to #2 down the line (probably without compression) +String hcc_integration_testing( String inside_args, String job, String build_config ) +{ + // Attempt to make unique docker image names for each build, to support concurrent builds + // Mangle docker org name with upstream build info + String testing_org_name = 'hcc-test-' + get_upstream_build_project( ).replaceAll('/','-') + '-' + get_upstream_build_num( ) + + // Tag image name with this build number + String hcc_test_image_name = "hcc:${env.BUILD_NUMBER}" + + def hip_integration_image = null + + dir( 'integration-testing' ) + { + deleteDir( ) + + // This invokes 'copy artifact plugin' to copy archived files from upstream build + step([$class: 'CopyArtifact', filter: 'archive/**/*.deb, docker/dockerfile-*', + fingerprintArtifacts: true, projectName: get_upstream_build_project( ), flatten: true, + selector: [$class: 'TriggeredBuildSelector', allowUpstreamDependencies: false, fallbackToLastSuccessful: false, upstreamFilterStrategy: 'UseGlobalSetting'], + target: '.' ]) +// // The following 'copy artifact' is supposed to copy direct from workspace, but it doesn't seem to work across machines +// step( [$class: 'CopyArtifact', filter: '**', fingerprintArtifacts: true, flatten: true, +// projectName: "${params.upstream_hcc}", selector: [$class: 'WorkspaceSelector'], target: 'integration-testing'] ) + + docker.build( "${testing_org_name}/${hcc_test_image_name}", "-f dockerfile-hcc-lc-ubuntu-16.04 ." ) + } + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( job ) + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + hip_integration_image = docker_build_image( job, testing_org_name, '', source_hip_rel, "${testing_org_name}/${hcc_test_image_name}" ) + + docker_build_inside_image( hip_integration_image, inside_args, job, '', build_config, source_hip_rel, build_hip_rel ) + + docker_clean_images( testing_org_name, '*' ) +} + +//////////////////////////////////////////////////////////////////////// +// -- MAIN +// Following this line is the start of MAIN of this Jenkinsfile +String build_config = 'Release' +String job_name = env.JOB_NAME.toLowerCase( ) + +// Integration testing is a special path which implies testing of an upsteam build of hcc, +// but does not need testing across older builds of hcc or cuda. This is more of a compiler +// hcc unit test +// params.hcc_integration_test is set in HCC build +if( params.hcc_integration_test ) +{ + println "HCC integration testing" + + node('docker && rocm') + { + hcc_integration_testing( '--device=/dev/kfd', 'hcc-ctu', build_config ) + } + + return +} + +// The following launches 3 builds in parallel: hcc-ctu, hcc-1.6 and cuda +parallel hcc_ctu: +{ + node('docker && rocm') + { + String hcc_ver = 'hcc-ctu' + String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/clang_tot_upgrade/hcc-lc-ubuntu-16.04:latest' + String inside_args = '--device=/dev/kfd' + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( hcc_ver ) + + // Create/reuse a docker image that represents the hip build environment + def hip_build_image = docker_build_image( hcc_ver, 'hip', ' --pull', source_hip_rel, from_image ) + + // Print system information for the log + hip_build_image.inside( inside_args ) + { + sh """#!/usr/bin/env bash + set -x + /opt/rocm/bin/rocm_agent_enumerator -t ALL + /opt/rocm/bin/hcc --version + """ + } + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel ) + + // After a successful build, upload a docker image of the results + String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel ) + + if( params.push_image_to_docker_hub ) + { + docker_upload_dockerhub( job_name, hip_image_name, 'rocm' ) + docker_clean_images( 'rocm', hip_image_name ) + } + docker_clean_images( job_name, hip_image_name ) + } +}, +hcc_1_6: +{ + node('docker && rocm') + { + String hcc_ver = 'hcc-1.6' + String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/roc-1.6.x/hcc-lc-ubuntu-16.04:latest' + String inside_args = '--device=/dev/kfd' + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( hcc_ver ) + + // Create/reuse a docker image that represents the hip build environment + def hip_build_image = docker_build_image( hcc_ver, 'hip', ' --pull', source_hip_rel, from_image ) + + // Print system information for the log + hip_build_image.inside( inside_args ) + { + sh """#!/usr/bin/env bash + set -x + /opt/rocm/bin/rocm_agent_enumerator -t ALL + /opt/rocm/bin/hcc --version + """ + } + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + docker_build_inside_image( hip_build_image, inside_args, hcc_ver, '', build_config, source_hip_rel, build_hip_rel ) + + // Not pushing hip-hcc-1.6 builds at this time; saves a minute and nobody needs? + // String hip_image_name = docker_upload_artifactory( hcc_ver, job_name, from_image, source_hip_rel, build_hip_rel ) + // docker_clean_images( job_name, hip_image_name ) + } +}, +nvcc: +{ + node('docker && cuda') + { + //////////////////////////////////////////////////////////////////////// + // Block of string constants customizing behavior for cuda + String nvcc_ver = 'nvcc-8.0' + String from_image = 'nvidia/cuda:8.0-devel' + + // This unfortunately hardcodes the driver version nvidia_driver_375.74 in the volume mount. Research if a way + // exists to get volume driver to customize the volume names to leave out driver version + String inside_args = '''--device=/dev/nvidiactl --device=/dev/nvidia0 --device=/dev/nvidia-uvm --device=/dev/nvidia-uvm-tools + --volume-driver=nvidia-docker --volume=nvidia_driver_375.74:/usr/local/nvidia:ro'''; + + // Checkout source code, dependencies and version files + String source_hip_rel = checkout_and_version( nvcc_ver ) + + // We pull public nvidia images + def hip_build_image = docker_build_image( nvcc_ver, 'hip', ' --pull', source_hip_rel, from_image ) + + // Print system information for the log + hip_build_image.inside( inside_args ) + { + sh """#!/usr/bin/env bash + set -x + nvidia-smi + nvcc --version + """ + } + + // Conctruct a binary directory path based on build config + String build_hip_rel = build_directory_rel( build_config ); + + // Build hip inside of the build environment + docker_build_inside_image( hip_build_image, inside_args, nvcc_ver, "-DHIP_NVCC_FLAGS=--Wno-deprecated-gpu-targets", build_config, source_hip_rel, build_hip_rel ) + } +} diff --git a/README.md b/README.md index 565fd6a36d..4f7f1a0123 100644 --- a/README.md +++ b/README.md @@ -29,9 +29,10 @@ HIP releases are typically of two types. The tag naming convention is different - [Installation](INSTALL.md) - [HIP FAQ](docs/markdown/hip_faq.md) - [HIP Kernel Language](docs/markdown/hip_kernel_language.md) -- [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP) +- [HIP Runtime API (Doxygen)](http://rocm-developer-tools.github.io/HIP) - [HIP Porting Guide](docs/markdown/hip_porting_guide.md) - [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md) +- [HIP Programming Guide](docs/markdown/hip_programming_guide.md) - [HIP Profiling ](docs/markdown/hip_profiling.md) - [HIP Debugging](docs/markdown/hip_debugging.md) - [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) @@ -122,8 +123,8 @@ make ## More Examples -The GitHub repository [HIP-Examples](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples.git) contains a hipified version of the popular Rodinia benchmark suite. -The README with the procedures and tips the team used during this porting effort is here: [Rodinia Porting Guide](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples/blob/master/rodinia_3.0/hip/README.hip_porting) +The GitHub repository [HIP-Examples](https://github.com/ROCm-Developer-Tools/HIP-Examples.git) contains a hipified version of the popular Rodinia benchmark suite. +The README with the procedures and tips the team used during this porting effort is here: [Rodinia Porting Guide](https://github.com/ROCm-Developer-Tools/HIP-Examples/blob/master/rodinia_3.0/hip/README.hip_porting) ## Tour of the HIP Directories * **include**: @@ -141,6 +142,6 @@ The README with the procedures and tips the team used during this porting effort * **doc**: Documentation - markdown and doxygen info ## Reporting an issue -Use the [GitHub issue tracker] (https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/issues). +Use the [GitHub issue tracker](https://github.com/ROCm-Developer-Tools/HIP/issues). If reporting a bug, include the output of "hipconfig --full" and samples/1_hipInfo/hipInfo (if possible). diff --git a/RELEASE.md b/RELEASE.md index a1e580b7b0..452ac54ee1 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,12 +1,25 @@ # Release notes -We have attempted to document known bugs and limitations - in particular the [HIP Kernel Language](docs/markdown/hip_kernel_language.md) document uses the phrase "Under Development", and the [HIP Runtime API bug list](http://gpuopen-professionalcompute-tools.github.io/HIP/bug.html) lists known bugs. +We have attempted to document known bugs and limitations - in particular the [HIP Kernel Language](docs/markdown/hip_kernel_language.md) document uses the phrase "Under Development", and the [HIP Runtime API bug list](http://rocm-developer-tools.github.io/HIP/bug.html) lists known bugs. =================================================================================================== + ## Revision History: +=================================================================================================== +Release: 1.3 +Date: 2017.08.16 +- hipcc now auto-detects amdgcn arch. No need to specify the arch when building for same system. +- HIP texture support +- Implemented __threadfence_support +- Improvements in HIP context management logic +- Bug fixes in several APIs including hipDeviceGetPCIBusId, hipEventDestroy, hipMemcpy2DAsync +- Updates to hipify-clang and documentation +- HIP development now fully open and on GitHub. Developers should submit pull requests. + + =================================================================================================== Release: 1.2 Date: 2017.06.29 @@ -113,7 +126,7 @@ Date: 2016.06.06 - Add cross-linking support between G++ and HCC, in particular for interfaces that use standard C++ libraries (ie std::vectors, std::strings). HIPCC now uses libstdc++ by default on the HCC compilation path. -- More samples including gpu-burn, SHOC, nbody, rtm. See [HIP-Examples](https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP-Examples) +- More samples including gpu-burn, SHOC, nbody, rtm. See [HIP-Examples](https://github.com/ROCm-Developer-Tools/HIP-Examples) =================================================================================================== diff --git a/bin/hipcc b/bin/hipcc index 2dceaa295f..8f18ab0d8e 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -47,7 +47,7 @@ sub parse_config_file { } $verbose = $ENV{'HIPCC_VERBOSE'} // 0; -# Verbose: 0x1=commands, 0x2=paths, 0x4=hippc args +# Verbose: 0x1=commands, 0x2=paths, 0x4=hipcc args $HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc @@ -75,6 +75,7 @@ $target_gfx801 = 0; $target_gfx802 = 0; $target_gfx803 = 0; $target_gfx900 = 0; +$default_amdgpu_target = 1; if ($HIP_PLATFORM eq "hcc") { $HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa"; @@ -103,31 +104,9 @@ if ($HIP_PLATFORM eq "hcc") { $HIPLDFLAGS = `${HCC_HOME}/bin/hcc-config --ldflags`; - $ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator"; - - my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`; - my @agentsLine = split('\n', $myAgents); - - foreach my $val (@agentsLine) { - if($val eq "gfx701") { - $target_gfx701 = 1; - } - if($val eq "gfx801") { - $target_gfx801 = 1; - } - if($val eq "gfx802") { - $target_gfx802 = 1; - } - if($val eq "gfx803") { - $target_gfx803 = 1; - } - if($val eq "gfx900") { - $target_gfx900 = 1; - } - } - #### GCC system includes workaround #### $HCC_WA_FLAGS = " "; + $HOST_OSNAME= `cat /etc/os-release | grep "^ID\=" | cut -d= -f2 | tr -d '\n'`; if ($HCC_VERSION_MAJOR eq 1) { my $GCC_CUR_VER = `gcc -dumpversion`; my $GPP_CUR_VER = `g++ -dumpversion`; @@ -138,7 +117,8 @@ if ($HIP_PLATFORM eq "hcc") { # Only include the libstdc++ headers and libraries flags explicitly if the g++ is older than version 5. # That's because HCC already uses libstdc++ by default if a newer g++/libstdc++ is available - if (${GCC_CUR_VER} eq ${GPP_CUR_VER} and $GPP_VER_FIELDS[0] < 5) { + # Cent OS 7 and RHEL 7.4 cannot use libstdc++ for compilation, default to libc++ + if (${GCC_CUR_VER} eq ${GPP_CUR_VER} and $GPP_VER_FIELDS[0] < 5 and ($HOST_OSNAME ne "\"centos\"") and ($HOST_OSNAME ne "\"rhel\"")) { $HCC_WA_FLAGS .= " -stdlib=libstdc++ -I/usr/include/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/c++/${GCC_CUR_VER} -I/usr/include/c++/${GCC_CUR_VER} "; # Add C++ libs for GCC. $HIPLDFLAGS .= " -lstdc++"; @@ -146,7 +126,6 @@ if ($HIP_PLATFORM eq "hcc") { } # Force -stdlib=libc++ on UB14.04 - $HOST_OSNAME= `cat /etc/os-release | grep "^ID\=" | cut -d= -f2 | tr -d '\n'`; $HOST_OSVER= `cat /etc/os-release | grep "^VERSION_ID\=" | cut -d= -f2 | tr -d '\n'`; if ($HOST_OSNAME eq "ubuntu" and $HOST_OSVER eq "\"14.04\"") { $HIPCXXFLAGS .= " -stdlib=libc++"; @@ -278,25 +257,32 @@ foreach $arg (@ARGV) $HIPCXXFLAGS .= " -stdlib=libc++"; $setStdLib = 1; } + + # TODO: Add support for comma separated list like HCC_AMDGPU_TARGET if($arg eq '--amdgpu-target=gfx701') { $target_gfx701 = 1; + $default_amdgpu_target = 0; } if($arg eq '--amdgpu-target=gfx801') { $target_gfx801 = 1; + $default_amdgpu_target = 0; } if($arg eq '--amdgpu-target=gfx802') { $target_gfx802 = 1; + $default_amdgpu_target = 0; } if($arg eq '--amdgpu-target=gfx803') { $target_gfx803 = 1; + $default_amdgpu_target = 0; } if($arg eq '--amdgpu-target=gfx900') { $target_gfx900 = 1; + $default_amdgpu_target = 0; } if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0)) @@ -358,38 +344,76 @@ foreach $arg (@ARGV) } $toolArgs .= " $arg" unless $swallowArg; } -if(defined $ENV{HCC_AMDGPU_TARGET}) -{ - foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET})) - { - if($target eq 'gfx701') - { - $target_gfx701 = 1; - } - if($target eq 'gfx801') - { - $target_gfx801 = 1; - } - if($target eq 'gfx802') - { - $target_gfx802 = 1; - } - if($target eq 'gfx803') - { - $target_gfx803 = 1; - } - if($target eq 'gfx900') - { - $target_gfx900 = 1; - } - } -} -if ($target_gfx701 eq 0 and $target_gfx801 eq 0 and $target_gfx802 eq 0 and $target_gfx803 eq 0 and $target_gfx900 eq 0) -{ - $target_gfx803 = 1; -} if($HIP_PLATFORM eq "hcc"){ + # No AMDGPU target specified at commandline. So look for HCC_AMDGPU_TARGET + if($default_amdgpu_target eq 1 and defined $ENV{HCC_AMDGPU_TARGET}) + { + foreach my $target (split(/,/, $ENV{HCC_AMDGPU_TARGET})) + { + if($target eq 'gfx701') + { + $target_gfx701 = 1; + $default_amdgpu_target = 0; + } + if($target eq 'gfx801') + { + $target_gfx801 = 1; + $default_amdgpu_target = 0; + } + if($target eq 'gfx802') + { + $target_gfx802 = 1; + $default_amdgpu_target = 0; + } + if($target eq 'gfx803') + { + $target_gfx803 = 1; + $default_amdgpu_target = 0; + } + if($target eq 'gfx900') + { + $target_gfx900 = 1; + $default_amdgpu_target = 0; + } + } + } + # Else try using rocm_agent_enumerator + if($default_amdgpu_target eq 1) + { + $ROCM_AGENT_ENUM = "${ROCM_PATH}/bin/rocm_agent_enumerator"; + + my $myAgents = `${ROCM_AGENT_ENUM} -t GPU`; + my @agentsLine = split('\n', $myAgents); + + foreach my $val (@agentsLine) { + if($val eq "gfx701") { + $target_gfx701 = 1; + $default_amdgpu_target = 0; + } + if($val eq "gfx801") { + $target_gfx801 = 1; + $default_amdgpu_target = 0; + } + if($val eq "gfx802") { + $target_gfx802 = 1; + $default_amdgpu_target = 0; + } + if($val eq "gfx803") { + $target_gfx803 = 1; + $default_amdgpu_target = 0; + } + if($val eq "gfx900") { + $target_gfx900 = 1; + $default_amdgpu_target = 0; + } + } + } + # rocm_agent_enumerator failed! Throw an error and die if linking is required + if ($default_amdgpu_target eq 1 and $compileOnly eq 0) + { + print "No valid AMD GPU target was either specified or found. Please specify a valid target using --amdgpu-target=" and die(); + } $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n"; diff --git a/bin/hipconfig b/bin/hipconfig index 39fdab5a99..c74d757fb5 100755 --- a/bin/hipconfig +++ b/bin/hipconfig @@ -1,7 +1,7 @@ #!/usr/bin/perl -w $HIP_BASE_VERSION_MAJOR = "1"; -$HIP_BASE_VERSION_MINOR = "2"; +$HIP_BASE_VERSION_MINOR = "3"; # Need perl > 5.10 to use logic-defined or use 5.006; use v5.10.1; diff --git a/bin/hipify-perl b/bin/hipify-perl index 27acc5bccc..ada017abf4 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -247,6 +247,7 @@ while (@ARGV) { $ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g; $ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g; $ft{'err'} += s/\bcudaErrorInvalidDevice\b/hipErrorInvalidDevice/g; + $ft{'err'} += s/\bcudaErrorInvalidDevicePointer\b/hipErrorInvalidDevicePointer/g; $ft{'err'} += s/\bcudaErrorNoDevice\b/hipErrorNoDevice/g; $ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g; $ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g; @@ -280,9 +281,11 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g; $ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g; + $ft{'mem'} += s/\bcudaPointerGetAttributes\b/hipPointerGetAttribute_t/g; $ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g; $ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g; + $ft{'mem'} += s/\bcudaMemcpyToArray\b/hipMemcpyToArray/g; #-------- # Memory management: @@ -302,6 +305,7 @@ while (@ARGV) { $ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g; $ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g; + $ft{'mem'} += s/\bcudaFreeArray\b/hipFreeArray/g; $ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g; @@ -323,9 +327,6 @@ while (@ARGV) { $ft{'coord_func'} += s/\bgridDim\.y\b/hipGridDim_y/g; $ft{'coord_func'} += s/\bgridDim\.z\b/hipGridDim_z/g; - # hack to avoid replacing hipDeviceProp.warpSize call - $ft{'special_func'} += s/([^.])\bwarpSize\b/$1hipWarpSize/g; - #-------- # Events @@ -337,6 +338,7 @@ while (@ARGV) { $ft{'event'} += s/\bcudaEventElapsedTime\b/hipEventElapsedTime/g; $ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g; $ft{'event'} += s/\bcudaEventDisableTiming\b/hipEventDisableTiming/g; + $ft{'event'} += s/\bcudaEventQuery\b/hipEventQuery/g; #-------- # Streams @@ -487,15 +489,15 @@ while (@ARGV) { my $kernelName; # Handle the <>> syntax: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, $6, /g; + $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, $6, /g; $kernelName = $1 if $k; # Handle the <>> syntax: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, 0, /g; + $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), $5, 0, /g; $kernelName = $1 if $k; # Handle the <>> syntax: - $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), 0, 0, /g; + $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernelGGL(($1$2), dim3($3), dim3($4), 0, 0, /g; $kernelName = $1 if $k; $ft{'kern'} += $k; @@ -511,11 +513,14 @@ while (@ARGV) { $ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g; $ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g; - $ft{'tex'} += s/\bcudaArray\b/hipArrary/g; + $ft{'tex'} += s/\bcudaArray\b/hipArray/g; $ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g; $ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g; $ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g; $ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g; + $ft{'tex'} += s/\bcudaChannelFormatKindFloat\b/hipChannelFormatKindFloat/g; + $ft{'tex'} += s/\bcudaAddressMode/hipAddressMode/g; + $ft{'tex'} += s/\bcudaFilterMode/hipFilterMode/g; } diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 5a5813ba0d..6668e60332 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -520,7 +520,7 @@ macro(HIP_ADD_EXECUTABLE hip_target) if("x${HCC_HOME}" STREQUAL "x") set(HCC_HOME "/opt/rocm/hcc") endif() - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) endmacro() diff --git a/docker/dockerfile-build-ubuntu-16.04 b/docker/dockerfile-build-ubuntu-16.04 new file mode 100644 index 0000000000..031bf72437 --- /dev/null +++ b/docker/dockerfile-build-ubuntu-16.04 @@ -0,0 +1,26 @@ +# Parameters related to building hip +ARG base_image + +FROM ${base_image} +MAINTAINER Kent Knox + +ARG user_uid + +# Install Packages +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + sudo \ + build-essential \ + cmake \ + git \ + libelf-dev \ + rpm \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# docker pipeline runs containers with particular uid +# create a jenkins user with this specific uid so it can use sudo priviledges +# Grant any member of sudo group password-less sudo privileges +RUN useradd --create-home -u ${user_uid} -G sudo --shell /bin/bash jenkins && \ + mkdir -p /etc/sudoers.d/ && \ + echo '%sudo ALL=(ALL) NOPASSWD:ALL' | tee /etc/sudoers.d/sudo-nopasswd diff --git a/docker/dockerfile-hip-ubuntu-16.04 b/docker/dockerfile-hip-ubuntu-16.04 new file mode 100644 index 0000000000..0852ae38c5 --- /dev/null +++ b/docker/dockerfile-hip-ubuntu-16.04 @@ -0,0 +1,19 @@ +# Parameters related to building hip +ARG base_image + +FROM ${base_image} +MAINTAINER Kent Knox + +# Copy the debian package of hip into the container from host +COPY *.deb /tmp/ + +# Install the debian package +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y curl \ + && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends --allow-unauthenticated -y \ + /tmp/hip_base-*.deb \ + /tmp/hip_hcc-*.deb \ + /tmp/hip_doc-*.deb \ + /tmp/hip_samples-* \ + && rm -f /tmp/*.deb \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* diff --git a/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index d797b31832..9a4700b19c 100644 --- a/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -2,690 +2,859 @@ ## **1. Data types used by CUDA driver** -| **type** | **CUDA** | **HIP** | **CUDA description** | -|-------------:|---------------------------------------------------------------|------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| struct | `CUDA_ARRAY3D_DESCRIPTOR` | | | -| struct | `CUDA_ARRAY_DESCRIPTOR` | | | -| struct | `CUDA_MEMCPY2D` | | | -| struct | `CUDA_MEMCPY3D` | | | -| struct | `CUDA_MEMCPY3D_PEER` | | | -| struct | `CUDA_POINTER_ATTRIBUTE_P2P_TOKENS` | | | -| struct | `CUDA_RESOURCE_DESC` | | | -| struct | `CUDA_RESOURCE_VIEW_DESC` | | | -| struct | `CUdevprop` | `hipDeviceProp_t` | | -| struct | `CUipcEventHandle` | | | -| struct | `CUipcMemHandle` | | | -| enum |***`CUaddress_mode`*** | | Texture reference addressing modes | -| 0 |*`CU_TR_ADDRESS_MODE_WRAP`* | | Wrapping address mode | -| 1 |*`CU_TR_ADDRESS_MODE_CLAMP`* | | Clamp to edge address mode | -| 2 |*`CU_TR_ADDRESS_MODE_MIRROR`* | | Mirror address mode | -| 3 |*`CU_TR_ADDRESS_MODE_BORDER`* | | Border address mode | -| enum |***`CUarray_cubemap_face`*** | | Array indices for cube faces | -| 0x00 |*`CU_CUBEMAP_FACE_POSITIVE_X`* | | Positive X face of cubemap | -| 0x01 |*`CU_CUBEMAP_FACE_NEGATIVE_X`* | | Negative X face of cubemap | -| 0x02 |*`CU_CUBEMAP_FACE_POSITIVE_Y`* | | Positive Y face of cubemap | -| 0x03 |*`CU_CUBEMAP_FACE_NEGATIVE_Y`* | | Negative Y face of cubemap | -| 0x04 |*`CU_CUBEMAP_FACE_POSITIVE_Z`* | | Positive Z face of cubemap | -| 0x05 |*`CU_CUBEMAP_FACE_NEGATIVE_Z`* | | Negative Z face of cubemap | -| enum |***`CUarray_format`*** | | Array formats | -| 0x01 |*`CU_AD_FORMAT_UNSIGNED_INT8`* | | Unsigned 8-bit integers | -| 0x02 |*`CU_AD_FORMAT_UNSIGNED_INT16`* | | Unsigned 16-bit integers | -| 0x03 |*`CU_AD_FORMAT_UNSIGNED_INT32`* | | Unsigned 32-bit integers | -| 0x08 |*`CU_AD_FORMAT_SIGNED_INT8`* | | Signed 8-bit integers | -| 0x09 |*`CU_AD_FORMAT_SIGNED_INT16`* | | Signed 16-bit integers | -| 0x0a |*`CU_AD_FORMAT_SIGNED_INT32`* | | Signed 32-bit integers | -| 0x10 |*`CU_AD_FORMAT_HALF`* | | 16-bit floating point | -| 0x20 |*`CU_AD_FORMAT_FLOAT`* | | 32-bit floating point | -| enum |***`CUctx_flags`*** | | Context creation flags | -| 0x00 |*`CU_CTX_SCHED_AUTO`* | | Automatic scheduling | -| 0x01 |*`CU_CTX_SCHED_SPIN`* | | Set spin as default scheduling | -| 0x02 |*`CU_CTX_SCHED_YIELD`* | | Set yield as default scheduling | -| 0x04 |*`CU_CTX_SCHED_BLOCKING_SYNC`* | | Set blocking synchronization as default scheduling | -| 0x04 |*`CU_CTX_BLOCKING_SYNC`* | | Set blocking synchronization as default scheduling Deprecated. This flag was deprecated as of CUDA 4.0 and was replaced with CU_CTX_SCHED_BLOCKING_SYNC.| -| 0x07 |*`CU_CTX_SCHED_MASK`* | | | -| 0x08 |*`CU_CTX_MAP_HOST`* | | Support mapped pinned allocations | -| 0x10 |*`CU_CTX_LMEM_RESIZE_TO_MAX`* | | Keep local memory allocation after launch | -| 0x1f |*`CU_CTX_FLAGS_MASK`* | | | -| enum |***`CUdevice_attribute`*** | | Device properties | -| 1 |*`CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK`* |*`hipDeviceAttributeMaxThreadsPerBlock`* | Maximum number of threads per block | -| 2 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X`* |*`hipDeviceAttributeMaxBlockDimX`* | Maximum block dimension X | -| 3 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y`* |*`hipDeviceAttributeMaxBlockDimY`* | Maximum block dimension Y | -| 4 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z`* |*`hipDeviceAttributeMaxBlockDimZ`* | Maximum block dimension Z | -| 5 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X`* |*`hipDeviceAttributeMaxGridDimX`* | Maximum grid dimension X | -| 6 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y`* |*`hipDeviceAttributeMaxGridDimY`* | Maximum grid dimension Y | -| 7 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z`* |*`hipDeviceAttributeMaxGridDimZ`* | Maximum grid dimension Y | -| 8 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | Maximum shared memory available per block in bytes | -| 8 |*`CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK | -| 9 |*`CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`* |*`hipDeviceAttributeTotalConstantMemory`* | Memory available on device for __constant__ variables in a CUDA C kernel in bytes | -| 10 |*`CU_DEVICE_ATTRIBUTE_WARP_SIZE`* |*`hipDeviceAttributeWarpSize`* | Warp size in threads | -| 11 |*`CU_DEVICE_ATTRIBUTE_MAX_PITCH`* | | Maximum pitch in bytes allowed by memory copies | -| 12 |*`CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | Maximum number of 32-bit registers available per block | -| 12 |*`CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK | -| 13 |*`CU_DEVICE_ATTRIBUTE_CLOCK_RATE`* |*`hipDeviceAttributeClockRate`* | Typical clock frequency in kilohertz | -| 14 |*`CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`* | | Alignment requirement for textures | -| 15 |*`CU_DEVICE_ATTRIBUTE_GPU_OVERLAP`* | | Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT| -| 16 |*`CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`* |*`hipDeviceAttributeMultiprocessorCount`* | Number of multiprocessors on device | -| 17 |*`CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT`* | | Specifies whether there is a run time limit on kernels | -| 18 |*`CU_DEVICE_ATTRIBUTE_INTEGRATED`* | | Device is integrated with host memory | -| 19 |*`CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY`* | | Device can map host memory into CUDA address space | -| 20 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`* |*`hipDeviceAttributeComputeMode`* | Compute mode (See CUcomputemode for details) | -| 21 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH`* | | Maximum 1D texture width | -| 22 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH`* | | Maximum 2D texture width | -| 23 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT`* | | Maximum 2D texture height | -| 24 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH`* | | Maximum 3D texture width | -| 25 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT`* | | Maximum 3D texture height | -| 26 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH`* | | Maximum 3D texture depth | -| 27 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH`* | | Maximum 2D layered texture width | -| 28 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT`* | | Maximum 2D layered texture height | -| 29 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS`* | | Maximum layers in a 2D layered texture | -| 27 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH`* | | Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH | -| 28 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT`* | | Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT | -| 29 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES`* | | Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS | -| 30 |*`CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT`* | | Alignment requirement for surfaces | -| 31 |*`CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS`* |*`hipDeviceAttributeConcurrentKernels`* | Device can possibly execute multiple kernels concurrently | -| 32 |*`CU_DEVICE_ATTRIBUTE_ECC_ENABLED`* | | Device has ECC support enabled | -| 33 |*`CU_DEVICE_ATTRIBUTE_PCI_BUS_ID`* |*`hipDeviceAttributePciBusId`* | PCI bus ID of the device | -| 34 |*`CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID`* |*`hipDeviceAttributePciDeviceId`* | PCI device ID of the device | -| 35 |*`CU_DEVICE_ATTRIBUTE_TCC_DRIVER`* | | Device is using TCC driver model | -| 36 |*`CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE`* |*`hipDeviceAttributeMemoryClockRate`* | Peak memory clock frequency in kilohertz | -| 37 |*`CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH`* |*`hipDeviceAttributeMemoryBusWidth`* | Global memory bus width in bits | -| 38 |*`CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE`* |*`hipDeviceAttributeL2CacheSize`* | Size of L2 cache in bytes | -| 39 |*`CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR`* |*`hipDeviceAttributeMaxThreadsPerMultiProcessor`* | Maximum resident threads per multiprocessor | -| 40 |*`CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT`* | | Number of asynchronous engines | -| 41 |*`CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`* | | Device shares a unified address space with the host | -| 42 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH`* | | Maximum 1D layered texture width | -| 43 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS`* | | Maximum layers in a 1D layered texture | -| 44 |*`CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER`* | | Deprecated, do not use | -| 45 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH`* | | Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set | -| 46 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT`* | | Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set | -| 47 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE`* | | Alternate maximum 3D texture width | -| 48 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE`* | | Alternate maximum 3D texture height | -| 49 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE`* | | Alternate maximum 3D texture depth | -| 50 |*`CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID`* | | PCI domain ID of the device | -| 51 |*`CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT`* | | Pitch alignment requirement for textures | -| 52 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH`* | | Maximum cubemap texture width/height | -| 53 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH`* | | Maximum cubemap layered texture width/height | -| 54 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS`* | | Maximum layers in a cubemap layered texture | -| 55 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH`* | | Maximum 1D surface width | -| 56 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH`* | | Maximum 2D surface width | -| 57 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT`* | | Maximum 2D surface height | -| 58 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH`* | | Maximum 3D surface width | -| 59 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT`* | | Maximum 3D surface height | -| 60 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH`* | | Maximum 3D surface depth | -| 61 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH`* | | Maximum 1D layered surface width | -| 62 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS`* | | Maximum layers in a 1D layered surface | -| 63 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH`* | | Maximum 2D layered surface width | -| 64 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT`* | | Maximum 2D layered surface height | -| 65 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS`* | | Maximum layers in a 2D layered surface | -| 66 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH`* | | Maximum cubemap surface width | -| 67 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH`* | | Maximum cubemap layered surface width | -| 68 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS`* | | Maximum layers in a cubemap layered surface | -| 69 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH`* | | Maximum 1D linear texture width | -| 70 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH`* | | Maximum 2D linear texture width | -| 71 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT`* | | Maximum 2D linear texture height | -| 72 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH`* | | Maximum 2D linear texture pitch in bytes | -| 73 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH`* | | Maximum mipmapped 2D texture width | -| 74 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT`* | | Maximum mipmapped 2D texture height | -| 75 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`* |*`hipDeviceAttributeComputeCapabilityMajor`* | Major compute capability version number | -| 76 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`* |*`hipDeviceAttributeComputeCapabilityMinor`* | Minor compute capability version number | -| 77 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH`* | | Maximum mipmapped 1D texture width | -| 78 |*`CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED`* | | Device supports stream priorities | -| 79 |*`CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED`* | | Device supports caching globals in L1 | -| 80 |*`CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED`* | | Device supports caching locals in L1 | -| 81 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`* |*`hipDeviceAttributeMaxSharedMemoryPerMultiprocessor`* | Maximum shared memory available per multiprocessor in bytes | -| 82 |*`CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR`* | | Maximum number of 32-bit registers available per multiprocessor | -| 83 |*`CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY`* |*`hipDeviceAttributeManagedMemory`* | Device can allocate managed memory on this system | -| 84 |*`CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD`* | | Device is on a multi-GPU board | -| 85 |*`CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID`* | | Unique id for a group of devices on the same multi-GPU board | -| 86 |*`CU_DEVICE_ATTRIBUTE_MAX`* | | | -| enum |***`CUevent_flags`*** | | Event creation flags | -| 0x00 |*`CU_EVENT_DEFAULT`* |*`hipEventDefault`* | Default event flag | -| 0x01 |*`CU_EVENT_BLOCKING_SYNC`* |*`hipEventBlockingSync`* | Event uses blocking synchronization | -| 0x02 |*`CU_EVENT_DISABLE_TIMING`* |*`hipEventDisableTiming`* | Event will not record timing data | -| 0x04 |*`CU_EVENT_INTERPROCESS`* |*`hipEventInterprocess`* | Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set | -| enum |***`CUfilter_mode`*** |***`hipTextureFilterMode`*** | Texture reference filtering modes | -| 0 |*`CU_TR_FILTER_MODE_POINT`* |*`hipFilterModePoint`* | Point filter mode | -| 1 |*`CU_TR_FILTER_MODE_LINEAR`* |*`hipFilterModeLinear`* | Linear filter mode | -| enum |***`CUfunc_cache`*** |***`hipFuncCache`*** | Function cache configurations | -| 0x00 |*`CU_FUNC_CACHE_PREFER_NONE`* |*`hipFuncCachePreferNone`* | no preference for shared memory or L1 (default) | -| 0x01 |*`CU_FUNC_CACHE_PREFER_SHARED`* |*`hipFuncCachePreferShared`* | prefer larger shared memory and smaller L1 cache | -| 0x02 |*`CU_FUNC_CACHE_PREFER_L1`* |*`hipFuncCachePreferL1`* | prefer larger L1 cache and smaller shared memory | -| 0x03 |*`CU_FUNC_CACHE_PREFER_EQUAL`* |*`hipFuncCachePreferEqual`* | prefer equal sized L1 cache and shared memory | -| enum |***`CUfunction_attribute`*** | | Function properties | -| 0 |*`CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`* | | The maximum number of threads per block, beyond which a launch of the function would fail. This number depends on both the function and the device on which the function is currently loaded. | -| 1 |*`CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`* | | The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory requested by the user at runtime. | -| 2 |*`CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`* | | The size in bytes of user-allocated constant memory required by this function. | -| 3 |*`CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`* | | The size in bytes of local memory used by each thread of this function. | -| 4 |*`CU_FUNC_ATTRIBUTE_NUM_REGS`* | | The number of registers used by each thread of this function. | -| 5 |*`CU_FUNC_ATTRIBUTE_PTX_VERSION`* | | The PTX virtual architecture version for which the function was compiled. This value is the major PTX version * 10 + the minor PTX version, so a PTX version 1.3 function would return the value 13. Note that this may return the undefined value of 0 for cubins compiled prior to CUDA 3.0. | -| 6 |*`CU_FUNC_ATTRIBUTE_BINARY_VERSION`* | | The binary architecture version for which the function was compiled. This value is the major binary version * 10 + the minor binary version, so a binary version 1.3 function would return the value 13. Note that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary architecture version. | -| 7 |*`CU_FUNC_ATTRIBUTE_CACHE_MODE_CA`* | | The attribute to indicate whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set. | -| 8 |*`CU_FUNC_ATTRIBUTE_MAX`* | | | -| enum |***`CUgraphicsMapResourceFlags`*** | | Flags for mapping and unmapping interop resources | -| 0x00 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`* | | | -| 0x01 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY`* | | | -| 0x02 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD`* | | | -| enum |***`CUgraphicsRegisterFlags`*** | | Flags to register a graphics resource | -| 0x00 |*`CU_GRAPHICS_REGISTER_FLAGS_NONE`* | | | -| 0x01 |*`CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY`* | | | -| 0x02 |*`CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD`* | | | -| 0x04 |*`CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST`* | | | -| 0x08 |*`CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER`* | | | -| enum |***`CUipcMem_flags`*** | | CUDA Ipc Mem Flags | -| 0x1 |*`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`* |*`hipIpcMemLazyEnablePeerAccess`* | Automatically enable peer access between remote devices as needed | -| enum |***`CUjit_cacheMode`*** | | Caching modes for dlcm | -| 0 |*`CU_JIT_CACHE_OPTION_NONE`* | | Compile with no -dlcm flag specified | -| |*`CU_JIT_CACHE_OPTION_CG`* | | Compile with L1 cache disabled | -| |*`CU_JIT_CACHE_OPTION_CA`* | | Compile with L1 cache enabled | -| enum |***`CUjit_fallback`*** | | Cubin matching fallback strategies | -| 0 |*`CU_PREFER_PTX`* | | Prefer to compile ptx if exact binary match not found | -| |*`CU_PREFER_BINARY`* | | Prefer to fall back to compatible binary code if exact match not found | -| enum |***`CUjit_option`*** | | Online compiler and linker options | -| 0 |*`CU_JIT_MAX_REGISTERS`* | | Max number of registers that a thread may use. Option type: unsigned int Applies to: compiler only. | -| |*`CU_JIT_THREADS_PER_BLOCK`* | | IN: Specifies minimum number of threads per block to target compilation for OUT: Returns the number of threads the compiler actually targeted. This restricts the resource utilization fo the compiler (e.g. max registers) such that a block with the given number of threads should be able to launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory utilization. Cannot be combined with CU_JIT_TARGET. Option type: unsigned int Applies to: compiler only. | -| |*`CU_JIT_WALL_TIME`* | | Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker Option type: float Applies to: compiler and linker. | -| |*`CU_JIT_INFO_LOG_BUFFER`* | | Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) Option type: char * Applies to: compiler and linker. | -| |*`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`* | | IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator) OUT: Amount of log buffer filled with messages Option type: unsigned int Applies to: compiler and linker. | -| |*`CU_JIT_OPTIMIZATION_LEVEL`* | | Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations. Option type: unsigned int Applies to: compiler only. | -| |*`CU_JIT_TARGET_FROM_CUCONTEXT`* | | No option value required. Determines the target based on the current attached context (default) Option type: No option value needed Applies to: compiler and linker. | -| |*`CU_JIT_TARGET`* | | Target is chosen based on supplied CUjit_target. Cannot be combined with CU_JIT_THREADS_PER_BLOCK. Option type: unsigned int for enumerated type CUjit_target Applies to: compiler and linker. | -| |*`CU_JIT_FALLBACK_STRATEGY`* | | Specifies choice of fallback strategy if matching cubin is not found. Choice is based on supplied CUjit_fallback. This option cannot be used with cuLink* APIs as the linker requires exact matches. Option type: unsigned int for enumerated type CUjit_fallback Applies to: compiler only. | -| |*`CU_JIT_GENERATE_DEBUG_INFO`* | | Specifies whether to create debug information in output (-g) (0: false, default) Option type: int Applies to: compiler and linker. | -| |*`CU_JIT_LOG_VERBOSE`* | | Generate verbose log messages (0: false, default) Option type: int Applies to: compiler and linker. | -| |*`CU_JIT_GENERATE_LINE_INFO`* | | Generate line number information (-lineinfo) (0: false, default) Option type: int Applies to: compiler only. | -| |*`CU_JIT_CACHE_MODE`* | | Specifies whether to enable caching explicitly (-dlcm) Choice is based on supplied CUjit_cacheMode_enum. Option type: unsigned int for enumerated type CUjit_cacheMode_enum Applies to: compiler only. | -| |*`CU_JIT_NUM_OPTIONS`* | | | -| enum |***`CUjit_target`*** | | Online compilation targets | -| 10 |*`CU_TARGET_COMPUTE_10`* | | Compute device class 1.0. | -| 11 |*`CU_TARGET_COMPUTE_11`* | | Compute device class 1.1. | -| 12 |*`CU_TARGET_COMPUTE_12`* | | Compute device class 1.2. | -| 13 |*`CU_TARGET_COMPUTE_13`* | | Compute device class 1.3. | -| 20 |*`CU_TARGET_COMPUTE_20`* | | Compute device class 2.0. | -| 21 |*`CU_TARGET_COMPUTE_21`* | | Compute device class 2.1. | -| 30 |*`CU_TARGET_COMPUTE_30`* | | Compute device class 3.0. | -| 32 |*`CU_TARGET_COMPUTE_32`* | | Compute device class 3.2. | -| 35 |*`CU_TARGET_COMPUTE_35`* | | Compute device class 3.5. | -| 37 |*`CU_TARGET_COMPUTE_37`* | | Compute device class 3.7. | -| 50 |*`CU_TARGET_COMPUTE_50`* | | Compute device class 5.0. | -| 52 |*`CU_TARGET_COMPUTE_52`* | | Compute device class 5.2. | -| enum |***`CUjitInputType`*** | | Device code formats | -| 0 |*`CU_JIT_INPUT_CUBIN`* | | Compiled device-class-specific device code Applicable options: none. | -| |*`CU_JIT_INPUT_PTX`* | | PTX source code Applicable options: PTX compiler options. | -| |*`CU_JIT_INPUT_FATBINARY`* | | Bundle of multiple cubins and/or PTX of some device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY. | -| |*`CU_JIT_INPUT_OBJECT`* | | Host object with embedded device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY. | -| |*`CU_JIT_INPUT_LIBRARY`* | | Archive of host objects with embedded device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY. | -| |*`CU_JIT_NUM_INPUT_TYPES`* | | | -| enum |***`CUlimit`*** |***`hipLimit_t`*** | Limits | -| 0x00 |*`CU_LIMIT_STACK_SIZE`* | | GPU thread stack size. | -| 0x01 |*`CU_LIMIT_PRINTF_FIFO_SIZE`* | | GPU printf FIFO size. | -| 0x02 |*`CU_LIMIT_MALLOC_HEAP_SIZE`* |*`hipLimitMallocHeapSize`* | GPU malloc heap size. | -| 0x03 |*`CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH`* | | GPU device runtime launch synchronize depth. | -| 0x04 |*`CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT`* | | GPU device runtime pending launch count. | -| |*`CU_LIMIT_MAX`* | | | -| enum |***`CUmemAttach_flags`*** | | CUDA Mem Attach Flags | -| 0x1 |*`CU_MEM_ATTACH_GLOBAL`* | | Memory can be accessed by any stream on any device. | -| 0x2 |*`CU_MEM_ATTACH_HOST`* | | Memory cannot be accessed by any stream on any device. | -| 0x4 |*`CU_MEM_ATTACH_SINGLE`* | | Memory can only be accessed by a single stream on the associated device. | -| enum |***`CUmemorytype`*** | | Memory types | -| 0x01 |*`CU_MEMORYTYPE_HOST`* | | Host memory | -| 0x02 |*`CU_MEMORYTYPE_DEVICE`* | | Device memory | -| 0x03 |*`CU_MEMORYTYPE_ARRAY`* | | Array memory | -| 0x04 |*`CU_MEMORYTYPE_UNIFIED`* | | Unified device or host memory | -| enum |***`CUoccupancy_flags`*** | | Occupancy calculator flag | -| 0x00 |*`CU_OCCUPANCY_DEFAULT`* | | Default behavior | -| 0x01 |*`CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`* | | Assume global caching is enabled and cannot be automatically turned off | -| enum |***`CUpointer_attribute`*** | | Pointer information | -| 1 |*`CU_POINTER_ATTRIBUTE_CONTEXT`* | | The CUcontext on which a pointer was allocated or registered | -| 2 |*`CU_POINTER_ATTRIBUTE_MEMORY_TYPE`* | | The CUmemorytype describing the physical location of a pointer | -| 3 |*`CU_POINTER_ATTRIBUTE_DEVICE_POINTER`* | | The address at which a pointer's memory may be accessed on the device | -| 4 |*`CU_POINTER_ATTRIBUTE_HOST_POINTER`* | | The address at which a pointer's memory may be accessed on the host | -| 5 |*`CU_POINTER_ATTRIBUTE_P2P_TOKENS`* | | A pair of tokens for use with the nv-p2p.h Linux kernel interface | -| 6 |*`CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`* | | Synchronize every synchronous memory operation initiated on this region | -| 7 |*`CU_POINTER_ATTRIBUTE_BUFFER_ID`* | | A process-wide unique ID for an allocated memory region | -| 8 |*`CU_POINTER_ATTRIBUTE_IS_MANAGED`* | | Indicates if the pointer points to managed memory | -| enum |***`CUmemorytype`*** | | Resource types | -| 0x00 |*`CU_RESOURCE_TYPE_ARRAY`* | | Array resoure | -| 0x01 |*`CU_RESOURCE_TYPE_MIPMAPPED_ARRAY`* | | Mipmapped array resource | -| 0x02 |*`CU_RESOURCE_TYPE_LINEAR`* | | Linear resource | -| 0x03 |*`CU_RESOURCE_TYPE_PITCH2D`* | | Pitch 2D resource | -| enum |***`CUresourceViewFormat`*** | | Resource view format | -| 0x00 |*`CU_RES_VIEW_FORMAT_NONE`* | | No resource view format (use underlying resource format) | -| 0x01 |*`CU_RES_VIEW_FORMAT_UINT_1X8`* | | 1 channel unsigned 8-bit integers | -| 0x02 |*`CU_RES_VIEW_FORMAT_UINT_2X8`* | | 2 channel unsigned 8-bit integers | -| 0x03 |*`CU_RES_VIEW_FORMAT_UINT_4X8`* | | 4 channel unsigned 8-bit integers | -| 0x04 |*`CU_RES_VIEW_FORMAT_SINT_1X8`* | | 1 channel signed 8-bit integers | -| 0x05 |*`CU_RES_VIEW_FORMAT_SINT_2X8`* | | 2 channel signed 8-bit integers | -| 0x06 |*`CU_RES_VIEW_FORMAT_SINT_4X8`* | | 4 channel signed 8-bit integers | -| 0x07 |*`CU_RES_VIEW_FORMAT_UINT_1X16`* | | 1 channel unsigned 16-bit integers | -| 0x08 |*`CU_RES_VIEW_FORMAT_UINT_2X16`* | | 2 channel unsigned 16-bit integers | -| 0x09 |*`CU_RES_VIEW_FORMAT_UINT_4X16`* | | 4 channel unsigned 16-bit integers | -| 0x0a |*`CU_RES_VIEW_FORMAT_SINT_1X16`* | | 1 channel signed 16-bit integers | -| 0x0b |*`CU_RES_VIEW_FORMAT_SINT_2X16`* | | 2 channel signed 16-bit integers | -| 0x0c |*`CU_RES_VIEW_FORMAT_SINT_4X16`* | | 4 channel signed 16-bit integers | -| 0x0d |*`CU_RES_VIEW_FORMAT_UINT_1X32`* | | 1 channel unsigned 32-bit integers | -| 0x0e |*`CU_RES_VIEW_FORMAT_UINT_2X32`* | | 2 channel unsigned 32-bit integers | -| 0x0f |*`CU_RES_VIEW_FORMAT_UINT_4X32`* | | 4 channel unsigned 32-bit integers | -| 0x10 |*`CU_RES_VIEW_FORMAT_SINT_1X32`* | | 1 channel signed 32-bit integers | -| 0x11 |*`CU_RES_VIEW_FORMAT_SINT_2X32`* | | 2 channel signed 32-bit integers | -| 0x12 |*`CU_RES_VIEW_FORMAT_SINT_4X32`* | | 4 channel signed 32-bit integers | -| 0x13 |*`CU_RES_VIEW_FORMAT_FLOAT_1X16`* | | 1 channel 16-bit floating point | -| 0x14 |*`CU_RES_VIEW_FORMAT_FLOAT_2X16`* | | 2 channel 16-bit floating point | -| 0x15 |*`CU_RES_VIEW_FORMAT_FLOAT_4X16`* | | 4 channel 16-bit floating point | -| 0x16 |*`CU_RES_VIEW_FORMAT_FLOAT_1X32`* | | 1 channel 32-bit floating point | -| 0x17 |*`CU_RES_VIEW_FORMAT_FLOAT_2X32`* | | 2 channel 32-bit floating point | -| 0x18 |*`CU_RES_VIEW_FORMAT_FLOAT_4X32`* | | 4 channel 32-bit floating point | -| 0x19 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC1`* | | Block compressed 1 | -| 0x1a |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC3`* | | Block compressed 2 | -| 0x1b |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC3`* | | Block compressed 3 | -| 0x1c |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC4`* | | Block compressed 4 unsigned | -| 0x1d |*`CU_RES_VIEW_FORMAT_SIGNED_BC4`* | | Block compressed 4 signed | -| 0x1e |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC5`* | | Block compressed 5 unsigned | -| 0x1f |*`CU_RES_VIEW_FORMAT_SIGNED_BC5`* | | Block compressed 5 signed | -| 0x20 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC6H`* | | Block compressed 6 unsigned half-float | -| 0x21 |*`CU_RES_VIEW_FORMAT_SIGNED_BC6H`* | | Block compressed 6 signed half-float | -| 0x22 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC7`* | | Block compressed 7 | -| enum |***`CUresult`*** |***`hipError_t`*** | Error codes | -| 0 |*`CUDA_SUCCESS`* |*`hipSuccess`* | The API call returned with no errors. In the case of query calls, this can also mean that the operation being queried is complete (see cuEventQuery() and cuStreamQuery()). | -| 1 |*`CUDA_ERROR_INVALID_VALUE`* |*`hipErrorInvalidValue`* | This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values. | -| 2 |*`CUDA_ERROR_OUT_OF_MEMORY`* |*`hipErrorMemoryAllocation`* | The API call failed because it was unable to allocate enough memory to perform the requested operation. | -| 3 |*`CUDA_ERROR_NOT_INITIALIZED`* |*`hipErrorNotInitialized`* | This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has failed. | -| 4 |*`CUDA_ERROR_DEINITIALIZED`* |*`hipErrorDeinitialized`* | This indicates that the CUDA driver is in the process of shutting down. | -| 5 |*`CUDA_ERROR_PROFILER_DISABLED`* |*`hipErrorProfilerDisabled`* | This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler. | -| 6 |*`CUDA_ERROR_PROFILER_NOT_INITIALIZED`* |*`hipErrorProfilerNotInitialized`* | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the profiling via cuProfilerStart or cuProfilerStop without initialization. | -| 7 |*`CUDA_ERROR_PROFILER_ALREADY_STARTED`* |*`hipErrorProfilerAlreadyStarted`* | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to call cuProfilerStart() when profiling is already enabled. | -| 8 |*`CUDA_ERROR_PROFILER_ALREADY_STOPPED`* |*`hipErrorProfilerAlreadyStopped`* | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to call cuProfilerStop() when profiling is already disabled. | -| 100 |*`CUDA_ERROR_NO_DEVICE`* |*`hipErrorNoDevice`* | This indicates that no CUDA-capable devices were detected by the installed CUDA driver. | -| 101 |*`CUDA_ERROR_INVALID_DEVICE`* |*`hipErrorInvalidDevice`* | This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device. | -| 200 |*`CUDA_ERROR_INVALID_IMAGE`* |*`hipErrorInvalidImage`* | This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module. | -| 201 |*`CUDA_ERROR_INVALID_CONTEXT`* |*`hipErrorInvalidContext`* | This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call is not a valid handle (such as a context that has had cuCtxDestroy() invoked on it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). See cuCtxGetApiVersion() for more details. | -| 202 |*`CUDA_ERROR_CONTEXT_ALREADY_CURRENT`* |*`hipErrorContextAlreadyCurrent`* | This indicated that the context being supplied as a parameter to the API call was already the active context. Deprecated This error return is deprecated as of CUDA 3.2. It is no longer an error to attempt to push the active context via cuCtxPushCurrent(). | -| 205 |*`CUDA_ERROR_MAP_FAILED`* |*`hipErrorMapFailed`* | This indicates that a map or register operation has failed. | -| 206 |*`CUDA_ERROR_UNMAP_FAILED`* |*`hipErrorUnmapFailed`* | This indicates that an unmap or unregister operation has failed. | -| 207 |*`CUDA_ERROR_ARRAY_IS_MAPPED`* |*`hipErrorArrayIsMapped`* | This indicates that the specified array is currently mapped and thus cannot be destroyed. | -| 208 |*`CUDA_ERROR_ALREADY_MAPPED`* |*`hipErrorAlreadyMapped`* | This indicates that the resource is already mapped. | -| 209 |*`CUDA_ERROR_NO_BINARY_FOR_GPU`* |*`hipErrorNoBinaryForGpu* | This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration. | -| 210 |*`CUDA_ERROR_ALREADY_ACQUIRED`* |*`hipErrorAlreadyAcquired* | This indicates that a resource has already been acquired. | -| 211 |*`CUDA_ERROR_NOT_MAPPED`* |*`hipErrorNotMapped`* | This indicates that a resource is not mapped. | -| 212 |*`CUDA_ERROR_NOT_MAPPED_AS_ARRAY`* |*`hipErrorNotMappedAsArray`* | This indicates that a mapped resource is not available for access as an array. | -| 213 |*`CUDA_ERROR_NOT_MAPPED_AS_POINTER`* |*`hipErrorNotMappedAsPointer`* | This indicates that a mapped resource is not available for access as a pointer. | -| 214 |*`CUDA_ERROR_ECC_UNCORRECTABLE`* |*`hipErrorECCNotCorrectable`* | This indicates that an uncorrectable ECC error was detected during execution. | -| 215 |*`CUDA_ERROR_UNSUPPORTED_LIMIT`* |*`hipErrorUnsupportedLimit`* | This indicates that the CUlimit passed to the API call is not supported by the active device. | -| 216 |*`CUDA_ERROR_CONTEXT_ALREADY_IN_USE`* |*`hipErrorContextAlreadyInUse`* | This indicates that the CUcontext passed to the API call can only be bound to a single CPU thread at a time but is already bound to a CPU thread. | -| 217 |*`CUDA_ERROR_PEER_ACCESS_UNSUPPORTED`* |*`hipErrorPeerAccessUnsupported`* | This indicates that peer access is not supported across the given devices. | -| 218 |*`CUDA_ERROR_INVALID_PTX`* |*`hipErrorInvalidKernelFile`* | This indicates that a PTX JIT compilation failed. | -| 219 |*`CUDA_ERROR_INVALID_GRAPHICS_CONTEXT`* |*`hipErrorInvalidGraphicsContext`* | This indicates an error with OpenGL or DirectX context. | -| 300 |*`CUDA_ERROR_INVALID_SOURCE`* |*`hipErrorInvalidSource`* | This indicates that the device kernel source is invalid. | -| 301 |*`CUDA_ERROR_FILE_NOT_FOUND`* |*`hipErrorFileNotFound`* | This indicates that the file specified was not found. | -| 302 |*`CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`* |*`hipErrorSharedObjectSymbolNotFound`* | This indicates that a link to a shared object failed to resolve. | -| 303 |*`CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`* |*`hipErrorSharedObjectInitFailed`* | This indicates that initialization of a shared object failed. | -| 304 |*`CUDA_ERROR_OPERATING_SYSTEM`* |*`hipErrorOperatingSystem`* | This indicates that an OS call failed. | -| 400 |*`CUDA_ERROR_INVALID_HANDLE`* |*`hipErrorInvalidResourceHandle`* | This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like CUstream and CUevent. | -| 500 |*`CUDA_ERROR_NOT_FOUND`* |*`hipErrorNotFound`* | This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, texture names, and surface names. | -| 600 |*`CUDA_ERROR_NOT_READY`* |*`hipErrorNotReady`* | This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than CUDA_SUCCESS (which indicates completion). Calls that may return this value include cuEventQuery() and cuStreamQuery(). | -| 700 |*`CUDA_ERROR_ILLEGAL_ADDRESS`* |*`hipErrorIllegalAddress`* | While executing a kernel, the device encountered a load or store instruction on an invalid memory address. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 701 |*`CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`* |*`hipErrorLaunchOutOfResources`* | This indicates that a launch did not occur because it did not have appropriate resources. This error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count. Passing arguments of the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many arguments and can also result in this error. | -| 702 |*`CUDA_ERROR_LAUNCH_TIMEOUT`* |*`hipErrorLaunchTimeOut`* | This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device attribute CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The context cannot be used (and must be destroyed similar to CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 703 |*`CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`* | | This error indicates a kernel launch that uses an incompatible texturing mode. | -| 704 |*`CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED`* |*`hipErrorPeerAccessAlreadyEnabled`* | This error indicates that a call to cuCtxEnablePeerAccess() is trying to re-enable peer access to a context which has already had peer access to it enabled. | -| 705 |*`CUDA_ERROR_PEER_ACCESS_NOT_ENABLED`* |*`hipErrorPeerAccessNotEnabled`* | This error indicates that cuCtxDisablePeerAccess() is trying to disable peer access which has not been enabled yet via cuCtxEnablePeerAccess(). | -| 708 |*`CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE`* | | This error indicates that cuCtxDisablePeerAccess() is trying to disable peer access which has not been enabled yet via cuCtxEnablePeerAccess(). | -| 709 |*`CUDA_ERROR_CONTEXT_IS_DESTROYED`* | | This error indicates that the context current to the calling thread has been destroyed using cuCtxDestroy, or is a primary context which has not yet been initialized. | -| 710 |*`CUDA_ERROR_ASSERT`* | | A device-side assert triggered during kernel execution. The context cannot be used anymore, and must be destroyed. All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 711 |*`CUDA_ERROR_TOO_MANY_PEERS`* | | This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to cuCtxEnablePeerAccess(). | -| 712 |*`CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED`* |*`hipErrorHostMemoryAlreadyRegistered`* | This error indicates that the memory range passed to cuMemHostRegister() has already been registered. | -| 713 |*`CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED`* |*`hipErrorHostMemoryNotRegistered`* | This error indicates that the pointer passed to cuMemHostUnregister() does not correspond to any currently registered memory region. | -| 714 |*`CUDA_ERROR_HARDWARE_STACK_ERROR`* | | While executing a kernel, the device encountered a stack error. This can be due to stack corruption or exceeding the stack size limit. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 715 |*`CUDA_ERROR_ILLEGAL_INSTRUCTION`* | | While executing a kernel, the device encountered an illegal instruction. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 716 |*`CUDA_ERROR_MISALIGNED_ADDRESS`* | | While executing a kernel, the device encountered a load or store instruction on a memory address which is not aligned. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 717 |*`CUDA_ERROR_INVALID_ADDRESS_SPACE`* | | While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 718 |*`CUDA_ERROR_INVALID_PC`* | | While executing a kernel, the device program counter wrapped its address space. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 719 |*`CUDA_ERROR_LAUNCH_FAILED`* | | An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA. | -| 800 |*`CUDA_ERROR_NOT_PERMITTED`* | | This error indicates that the attempted operation is not permitted. | -| 801 |*`CUDA_ERROR_NOT_SUPPORTED`* | | This error indicates that the attempted operation is not supported on the current system or device. | -| 999 |*`CUDA_ERROR_UNKNOWN`* | | This indicates that an unknown internal error has occurred. | -| enum |***`CUstream_flags`*** |***`hipStreamFlags`*** | Stream creation flags | -| 0x0 |*`CU_STREAM_DEFAULT`* |*`hipStreamDefault`* | Default stream flag | -| 0x1 |*`CU_STREAM_NON_BLOCKING`* |*`hipStreamNonBlocking`* | Stream does not synchronize with stream 0 (the NULL stream) | -| typedef | `CUarray` | `hipArray *` | CUDA array | -| struct | `CUarray_st` | `hipArray` | CUDA array | -| typedef | `CUcontext` | `hipCtx_t` | CUDA context | -| typedef | `CUdevice` | `hipDevice_t` | CUDA device | -| typedef | `CUdeviceptr` | `hipDeviceptr_t` | CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform. | -| typedef | `CUevent` | `hipEvent_t` | CUDA event | -| typedef | `CUfunction` | `hipFunction_t` | CUDA function | -| typedef | `CUgraphicsResource` | | CUDA graphics interop resource | -| typedef | `CUmipmappedArray` | | CUDA mipmapped array | -| typedef | `CUmodule` | `hipModule_t` | CUDA module | -| typedef | `CUstream` | `hipStream_t` | CUDA module | -| typedef | `CUstreamCallback` | `hipStreamCallback_t` | CUDA stream callback | -| typedef | `CUsurfObject` | | An opaque value that represents a CUDA surface object | -| typedef | `CUsurfref` | | CUDA surface reference | -| typedef | `CUtexObject` | | An opaque value that represents a CUDA texture object | -| typedef | `CUtexref` | | CUDA texture reference | -| define |`CU_IPC_HANDLE_SIZE` | | CUDA IPC handle size. | -| define |`CU_LAUNCH_PARAM_BUFFER_POINTER` | `HIP_LAUNCH_PARAM_BUFFER_POINTER` | Indicator that the next value in the extra parameter to cuLaunchKernel will be a pointer to a buffer containing all kernel parameters used for launching kernel f. This buffer needs to honor all alignment/padding requirements of the individual parameters. If CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the extra array, then CU_LAUNCH_PARAM_BUFFER_POINTER will have no effect. | -| define |`CU_LAUNCH_PARAM_BUFFER_SIZE` | `HIP_LAUNCH_PARAM_BUFFER_SIZE` | Indicator that the next value in the extra parameter to cuLaunchKernel will be a pointer to a size_t which contains the size of the buffer specified with CU_LAUNCH_PARAM_BUFFER_POINTER. It is required that CU_LAUNCH_PARAM_BUFFER_POINTER also be specified in the extra array if the value associated with CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. | -| define |`CU_LAUNCH_PARAM_END` | `HIP_LAUNCH_PARAM_END` | End of array terminator for the extra parameter to cuLaunchKernel. | -| define |`CU_MEMHOSTALLOC_DEVICEMAP` | | If set, host memory is mapped into CUDA address space and cuMemHostGetDevicePointer() may be called on the host pointer. Flag for cuMemHostAlloc(). | -| define |`CU_MEMHOSTALLOC_PORTABLE` | | If set, host memory is portable between CUDA contexts. Flag for cuMemHostAlloc(). | -| define |`CU_MEMHOSTALLOC_WRITECOMBINED` | | If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction (MOVNTDQA). Flag for cuMemHostAlloc(). | -| define |`CU_MEMHOSTREGISTER_DEVICEMAP` | | If set, host memory is mapped into CUDA address space and cuMemHostGetDevicePointer() may be called on the host pointer. Flag for cuMemHostRegister(). | -| define |`CU_MEMHOSTREGISTER_IOMEMORY` | | If set, the passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. belonging to a third-party PCIe device. On Windows the flag is a no-op. On Linux that memory is marked as non cache-coherent for the GPU and is expected to be physically contiguous. It may return CUDA_ERROR_NOT_PERMITTED if run as an unprivileged user, CUDA_ERROR_NOT_SUPPORTED on older Linux kernel versions. On all other platforms, it is not supported and CUDA_ERROR_NOT_SUPPORTED is returned. Flag for cuMemHostRegister(). | -| define |`CU_MEMHOSTREGISTER_PORTABLE` | | If set, host memory is portable between CUDA contexts. Flag for cuMemHostRegister(). | -| define |`CU_PARAM_TR_DEFAULT` | | For texture references loaded into the module, use default texunit from texture reference. | -| define |`CU_STREAM_LEGACY` | | Legacy stream handle. Stream handle that can be passed as a CUstream to use an implicit stream with legacy synchronization behavior. See details of the synchronization behavior. | -| define |`CU_STREAM_PER_THREAD` | | Per-thread stream handle. Stream handle that can be passed as a CUstream to use an implicit stream with perthread synchronization behavior. See details of the synchronization behavior. | -| define |`CU_TRSA_OVERRIDE_FORMAT` | | Override the texref format with a format inferred from the array. Flag for cuTexRefSetArray(). | -| define |`CU_TRSF_NORMALIZED_COORDINATES` | | Use normalized texture coordinates in the range [0,1) instead of [0,dim). Flag for cuTexRefSetFlags(). | -| define |`CU_TRSF_SRGB` | | Perform sRGB->linear conversion during texture read. Flag for cuTexRefSetFlags(). | -| define |`CUDA_ARRAY3D_2DARRAY` | | Deprecated, use CUDA_ARRAY3D_LAYERED. | -| define |`CUDA_ARRAY3D_CUBEMAP` | | If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The width of such a CUDA array must be equal to its height, and Depth must be six. If CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps and Depth must be a multiple of six. | -| define |`CUDA_ARRAY3D_DEPTH_TEXTURE` | | This flag if set indicates that the CUDA array is a DEPTH_TEXTURE. | -| define |`CUDA_ARRAY3D_LAYERED` | | If set, the CUDA array is a collection of layers, where each layer is either a 1D or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number of layers, not the depth of a 3D array. | -| define |`CUDA_ARRAY3D_SURFACE_LDST` | | This flag must be set in order to bind a surface reference to the CUDA array. | -| define |`CUDA_ARRAY3D_TEXTURE_GATHER` | | This flag must be set in order to perform texture gather operations on a CUDA array. | -| define |`CUDA_VERSION` | | CUDA API version number. | +| **type** | **CUDA** | **HIP** | +|-------------:|---------------------------------------------------------------|------------------------------------------------------------| +| struct | `CUDA_ARRAY3D_DESCRIPTOR` | | +| struct | `CUDA_ARRAY_DESCRIPTOR` | | +| struct | `CUDA_MEMCPY2D` | | +| struct | `CUDA_MEMCPY3D` | | +| struct | `CUDA_MEMCPY3D_PEER` | | +| struct | `CUDA_POINTER_ATTRIBUTE_P2P_TOKENS` | | +| struct | `CUDA_RESOURCE_DESC` | | +| struct | `CUDA_RESOURCE_VIEW_DESC` | | +| struct | `CUdevprop` | `hipDeviceProp_t` | +| struct | `CUipcEventHandle` | | +| struct | `CUipcMemHandle` | | +| enum |***`CUaddress_mode`*** | | +| 0 |*`CU_TR_ADDRESS_MODE_WRAP`* | | +| 1 |*`CU_TR_ADDRESS_MODE_CLAMP`* | | +| 2 |*`CU_TR_ADDRESS_MODE_MIRROR`* | | +| 3 |*`CU_TR_ADDRESS_MODE_BORDER`* | | +| enum |***`CUarray_cubemap_face`*** | | +| 0x00 |*`CU_CUBEMAP_FACE_POSITIVE_X`* | | +| 0x01 |*`CU_CUBEMAP_FACE_NEGATIVE_X`* | | +| 0x02 |*`CU_CUBEMAP_FACE_POSITIVE_Y`* | | +| 0x03 |*`CU_CUBEMAP_FACE_NEGATIVE_Y`* | | +| 0x04 |*`CU_CUBEMAP_FACE_POSITIVE_Z`* | | +| 0x05 |*`CU_CUBEMAP_FACE_NEGATIVE_Z`* | | +| enum |***`CUarray_format`*** | | +| 0x01 |*`CU_AD_FORMAT_UNSIGNED_INT8`* | | +| 0x02 |*`CU_AD_FORMAT_UNSIGNED_INT16`* | | +| 0x03 |*`CU_AD_FORMAT_UNSIGNED_INT32`* | | +| 0x08 |*`CU_AD_FORMAT_SIGNED_INT8`* | | +| 0x09 |*`CU_AD_FORMAT_SIGNED_INT16`* | | +| 0x0a |*`CU_AD_FORMAT_SIGNED_INT32`* | | +| 0x10 |*`CU_AD_FORMAT_HALF`* | | +| 0x20 |*`CU_AD_FORMAT_FLOAT`* | | +| enum |***`CUctx_flags`*** | | +| 0x00 |*`CU_CTX_SCHED_AUTO`* | | +| 0x01 |*`CU_CTX_SCHED_SPIN`* | | +| 0x02 |*`CU_CTX_SCHED_YIELD`* | | +| 0x04 |*`CU_CTX_SCHED_BLOCKING_SYNC`* | | +| 0x04 |*`CU_CTX_BLOCKING_SYNC`* | | +| 0x07 |*`CU_CTX_SCHED_MASK`* | | +| 0x08 |*`CU_CTX_MAP_HOST`* | | +| 0x10 |*`CU_CTX_LMEM_RESIZE_TO_MAX`* | | +| 0x1f |*`CU_CTX_FLAGS_MASK`* | | +| enum |***`CUdevice_attribute`*** | | +| 1 |*`CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK`* |*`hipDeviceAttributeMaxThreadsPerBlock`* | +| 2 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X`* |*`hipDeviceAttributeMaxBlockDimX`* | +| 3 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y`* |*`hipDeviceAttributeMaxBlockDimY`* | +| 4 |*`CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z`* |*`hipDeviceAttributeMaxBlockDimZ`* | +| 5 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X`* |*`hipDeviceAttributeMaxGridDimX`* | +| 6 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y`* |*`hipDeviceAttributeMaxGridDimY`* | +| 7 |*`CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z`* |*`hipDeviceAttributeMaxGridDimZ`* | +| 8 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | +| 8 |*`CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | +| 9 |*`CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY`* |*`hipDeviceAttributeTotalConstantMemory`* | +| 10 |*`CU_DEVICE_ATTRIBUTE_WARP_SIZE`* |*`hipDeviceAttributeWarpSize`* | +| 11 |*`CU_DEVICE_ATTRIBUTE_MAX_PITCH`* | | +| 12 |*`CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | +| 12 |*`CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | +| 13 |*`CU_DEVICE_ATTRIBUTE_CLOCK_RATE`* |*`hipDeviceAttributeClockRate`* | +| 14 |*`CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT`* | | +| 15 |*`CU_DEVICE_ATTRIBUTE_GPU_OVERLAP`* | | +| 16 |*`CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT`* |*`hipDeviceAttributeMultiprocessorCount`* | +| 17 |*`CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT`* | | +| 18 |*`CU_DEVICE_ATTRIBUTE_INTEGRATED`* | | +| 19 |*`CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY`* | | +| 20 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_MODE`* |*`hipDeviceAttributeComputeMode`* | +| 21 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH`* | | +| 22 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH`* | | +| 23 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT`* | | +| 24 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH`* | | +| 25 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT`* | | +| 26 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH`* | | +| 27 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH`* | | +| 28 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT`* | | +| 29 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS`* | | +| 27 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH`* | | +| 28 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT`* | | +| 29 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES`* | | +| 30 |*`CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT`* | | +| 31 |*`CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS`* |*`hipDeviceAttributeConcurrentKernels`* | +| 32 |*`CU_DEVICE_ATTRIBUTE_ECC_ENABLED`* | | +| 33 |*`CU_DEVICE_ATTRIBUTE_PCI_BUS_ID`* |*`hipDeviceAttributePciBusId`* | +| 34 |*`CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID`* |*`hipDeviceAttributePciDeviceId`* | +| 35 |*`CU_DEVICE_ATTRIBUTE_TCC_DRIVER`* | | +| 36 |*`CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE`* |*`hipDeviceAttributeMemoryClockRate`* | +| 37 |*`CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH`* |*`hipDeviceAttributeMemoryBusWidth`* | +| 38 |*`CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE`* |*`hipDeviceAttributeL2CacheSize`* | +| 39 |*`CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR`* |*`hipDeviceAttributeMaxThreadsPerMultiProcessor`* | +| 40 |*`CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT`* | | +| 41 |*`CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING`* | | +| 42 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH`* | | +| 43 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS`* | | +| 44 |*`CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER`* | | +| 45 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH`* | | +| 46 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT`* | | +| 47 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE`* | | +| 48 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE`* | | +| 49 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE`* | | +| 50 |*`CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID`* | | +| 51 |*`CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT`* | | +| 52 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH`* | | +| 53 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH`* | | +| 54 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS`* | | +| 55 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH`* | | +| 56 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH`* | | +| 57 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT`* | | +| 58 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH`* | | +| 59 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT`* | | +| 60 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH`* | | +| 61 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH`* | | +| 62 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS`* | | +| 63 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH`* | | +| 64 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT`* | | +| 65 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS`* | | +| 66 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH`* | | +| 67 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH`* | | +| 68 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS`* | | +| 69 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH`* | | +| 70 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH`* | | +| 71 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT`* | | +| 72 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH`* | | +| 73 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH`* | | +| 74 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT`* | | +| 75 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR`* |*`hipDeviceAttributeComputeCapabilityMajor`* | +| 76 |*`CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR`* |*`hipDeviceAttributeComputeCapabilityMinor`* | +| 77 |*`CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH`* | | +| 78 |*`CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED`* | | +| 79 |*`CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED`* | | +| 80 |*`CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED`* | | +| 81 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR`* |*`hipDeviceAttributeMaxSharedMemoryPerMultiprocessor`* | +| 82 |*`CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR`* | | +| 83 |*`CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY`* |*`hipDeviceAttributeManagedMemory`* | +| 84 |*`CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD`* | | +| 85 |*`CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID`* | | +| 86 |*`CU_DEVICE_ATTRIBUTE_MAX`* | | +| enum |***`CUevent_flags`*** | | +| 0x00 |*`CU_EVENT_DEFAULT`* |*`hipEventDefault`* | +| 0x01 |*`CU_EVENT_BLOCKING_SYNC`* |*`hipEventBlockingSync`* | +| 0x02 |*`CU_EVENT_DISABLE_TIMING`* |*`hipEventDisableTiming`* | +| 0x04 |*`CU_EVENT_INTERPROCESS`* |*`hipEventInterprocess`* | +| enum |***`CUfilter_mode`*** |***`hipTextureFilterMode`*** | +| 0 |*`CU_TR_FILTER_MODE_POINT`* |*`hipFilterModePoint`* | +| 1 |*`CU_TR_FILTER_MODE_LINEAR`* |*`hipFilterModeLinear`* | +| enum |***`CUfunc_cache`*** |***`hipFuncCache`*** | +| 0x00 |*`CU_FUNC_CACHE_PREFER_NONE`* |*`hipFuncCachePreferNone`* | +| 0x01 |*`CU_FUNC_CACHE_PREFER_SHARED`* |*`hipFuncCachePreferShared`* | +| 0x02 |*`CU_FUNC_CACHE_PREFER_L1`* |*`hipFuncCachePreferL1`* | +| 0x03 |*`CU_FUNC_CACHE_PREFER_EQUAL`* |*`hipFuncCachePreferEqual`* | +| enum |***`CUfunction_attribute`*** | | +| 0 |*`CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK`* | | +| 1 |*`CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES`* | | +| 2 |*`CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES`* | | +| 3 |*`CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES`* | | +| 4 |*`CU_FUNC_ATTRIBUTE_NUM_REGS`* | | +| 5 |*`CU_FUNC_ATTRIBUTE_PTX_VERSION`* | | +| 6 |*`CU_FUNC_ATTRIBUTE_BINARY_VERSION`* | | +| 7 |*`CU_FUNC_ATTRIBUTE_CACHE_MODE_CA`* | | +| 8 |*`CU_FUNC_ATTRIBUTE_MAX`* | | +| enum |***`CUgraphicsMapResourceFlags`*** | | +| 0x00 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE`* | | +| 0x01 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY`* | | +| 0x02 |*`CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD`* | | +| enum |***`CUgraphicsRegisterFlags`*** | | +| 0x00 |*`CU_GRAPHICS_REGISTER_FLAGS_NONE`* | | +| 0x01 |*`CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY`* | | +| 0x02 |*`CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD`* | | +| 0x04 |*`CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST`* | | +| 0x08 |*`CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER`* | | +| enum |***`CUipcMem_flags`*** | | +| 0x1 |*`CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS`* |*`hipIpcMemLazyEnablePeerAccess`* | +| enum |***`CUjit_cacheMode`*** | | +| 0 |*`CU_JIT_CACHE_OPTION_NONE`* | | +| |*`CU_JIT_CACHE_OPTION_CG`* | | +| |*`CU_JIT_CACHE_OPTION_CA`* | | +| enum |***`CUjit_fallback`*** | | +| 0 |*`CU_PREFER_PTX`* | | +| |*`CU_PREFER_BINARY`* | | +| enum |***`CUjit_option`*** | | +| 0 |*`CU_JIT_MAX_REGISTERS`* | | +| |*`CU_JIT_THREADS_PER_BLOCK`* | | +| |*`CU_JIT_WALL_TIME`* | | +| |*`CU_JIT_INFO_LOG_BUFFER`* | | +| |*`CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES`* | | +| |*`CU_JIT_OPTIMIZATION_LEVEL`* | | +| |*`CU_JIT_TARGET_FROM_CUCONTEXT`* | | +| |*`CU_JIT_TARGET`* | | +| |*`CU_JIT_FALLBACK_STRATEGY`* | | +| |*`CU_JIT_GENERATE_DEBUG_INFO`* | | +| |*`CU_JIT_LOG_VERBOSE`* | | +| |*`CU_JIT_GENERATE_LINE_INFO`* | | +| |*`CU_JIT_CACHE_MODE`* | | +| |*`CU_JIT_NUM_OPTIONS`* | | +| enum |***`CUjit_target`*** | | +| 10 |*`CU_TARGET_COMPUTE_10`* | | +| 11 |*`CU_TARGET_COMPUTE_11`* | | +| 12 |*`CU_TARGET_COMPUTE_12`* | | +| 13 |*`CU_TARGET_COMPUTE_13`* | | +| 20 |*`CU_TARGET_COMPUTE_20`* | | +| 21 |*`CU_TARGET_COMPUTE_21`* | | +| 30 |*`CU_TARGET_COMPUTE_30`* | | +| 32 |*`CU_TARGET_COMPUTE_32`* | | +| 35 |*`CU_TARGET_COMPUTE_35`* | | +| 37 |*`CU_TARGET_COMPUTE_37`* | | +| 50 |*`CU_TARGET_COMPUTE_50`* | | +| 52 |*`CU_TARGET_COMPUTE_52`* | | +| enum |***`CUjitInputType`*** | | +| 0 |*`CU_JIT_INPUT_CUBIN`* | | +| |*`CU_JIT_INPUT_PTX`* | | +| |*`CU_JIT_INPUT_FATBINARY`* | | +| |*`CU_JIT_INPUT_OBJECT`* | | +| |*`CU_JIT_INPUT_LIBRARY`* | | +| |*`CU_JIT_NUM_INPUT_TYPES`* | | +| enum |***`CUlimit`*** |***`hipLimit_t`*** | +| 0x00 |*`CU_LIMIT_STACK_SIZE`* | | +| 0x01 |*`CU_LIMIT_PRINTF_FIFO_SIZE`* | | +| 0x02 |*`CU_LIMIT_MALLOC_HEAP_SIZE`* |*`hipLimitMallocHeapSize`* | +| 0x03 |*`CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH`* | | +| 0x04 |*`CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT`* | | +| |*`CU_LIMIT_MAX`* | | +| enum |***`CUmemAttach_flags`*** | | +| 0x1 |*`CU_MEM_ATTACH_GLOBAL`* | | +| 0x2 |*`CU_MEM_ATTACH_HOST`* | | +| 0x4 |*`CU_MEM_ATTACH_SINGLE`* | | +| enum |***`CUmemorytype`*** | | +| 0x01 |*`CU_MEMORYTYPE_HOST`* | | +| 0x02 |*`CU_MEMORYTYPE_DEVICE`* | | +| 0x03 |*`CU_MEMORYTYPE_ARRAY`* | | +| 0x04 |*`CU_MEMORYTYPE_UNIFIED`* | | +| enum |***`CUoccupancy_flags`*** | | +| 0x00 |*`CU_OCCUPANCY_DEFAULT`* | | +| 0x01 |*`CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE`* | | +| enum |***`CUpointer_attribute`*** | | +| 1 |*`CU_POINTER_ATTRIBUTE_CONTEXT`* | | +| 2 |*`CU_POINTER_ATTRIBUTE_MEMORY_TYPE`* | | +| 3 |*`CU_POINTER_ATTRIBUTE_DEVICE_POINTER`* | | +| 4 |*`CU_POINTER_ATTRIBUTE_HOST_POINTER`* | | +| 5 |*`CU_POINTER_ATTRIBUTE_P2P_TOKENS`* | | +| 6 |*`CU_POINTER_ATTRIBUTE_SYNC_MEMOPS`* | | +| 7 |*`CU_POINTER_ATTRIBUTE_BUFFER_ID`* | | +| 8 |*`CU_POINTER_ATTRIBUTE_IS_MANAGED`* | | +| enum |***`CUmemorytype`*** | | +| 0x00 |*`CU_RESOURCE_TYPE_ARRAY`* | | +| 0x01 |*`CU_RESOURCE_TYPE_MIPMAPPED_ARRAY`* | | +| 0x02 |*`CU_RESOURCE_TYPE_LINEAR`* | | +| 0x03 |*`CU_RESOURCE_TYPE_PITCH2D`* | | +| enum |***`CUresourceViewFormat`*** | | +| 0x00 |*`CU_RES_VIEW_FORMAT_NONE`* | | +| 0x01 |*`CU_RES_VIEW_FORMAT_UINT_1X8`* | | +| 0x02 |*`CU_RES_VIEW_FORMAT_UINT_2X8`* | | +| 0x03 |*`CU_RES_VIEW_FORMAT_UINT_4X8`* | | +| 0x04 |*`CU_RES_VIEW_FORMAT_SINT_1X8`* | | +| 0x05 |*`CU_RES_VIEW_FORMAT_SINT_2X8`* | | +| 0x06 |*`CU_RES_VIEW_FORMAT_SINT_4X8`* | | +| 0x07 |*`CU_RES_VIEW_FORMAT_UINT_1X16`* | | +| 0x08 |*`CU_RES_VIEW_FORMAT_UINT_2X16`* | | +| 0x09 |*`CU_RES_VIEW_FORMAT_UINT_4X16`* | | +| 0x0a |*`CU_RES_VIEW_FORMAT_SINT_1X16`* | | +| 0x0b |*`CU_RES_VIEW_FORMAT_SINT_2X16`* | | +| 0x0c |*`CU_RES_VIEW_FORMAT_SINT_4X16`* | | +| 0x0d |*`CU_RES_VIEW_FORMAT_UINT_1X32`* | | +| 0x0e |*`CU_RES_VIEW_FORMAT_UINT_2X32`* | | +| 0x0f |*`CU_RES_VIEW_FORMAT_UINT_4X32`* | | +| 0x10 |*`CU_RES_VIEW_FORMAT_SINT_1X32`* | | +| 0x11 |*`CU_RES_VIEW_FORMAT_SINT_2X32`* | | +| 0x12 |*`CU_RES_VIEW_FORMAT_SINT_4X32`* | | +| 0x13 |*`CU_RES_VIEW_FORMAT_FLOAT_1X16`* | | +| 0x14 |*`CU_RES_VIEW_FORMAT_FLOAT_2X16`* | | +| 0x15 |*`CU_RES_VIEW_FORMAT_FLOAT_4X16`* | | +| 0x16 |*`CU_RES_VIEW_FORMAT_FLOAT_1X32`* | | +| 0x17 |*`CU_RES_VIEW_FORMAT_FLOAT_2X32`* | | +| 0x18 |*`CU_RES_VIEW_FORMAT_FLOAT_4X32`* | | +| 0x19 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC1`* | | +| 0x1a |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC3`* | | +| 0x1b |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC3`* | | +| 0x1c |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC4`* | | +| 0x1d |*`CU_RES_VIEW_FORMAT_SIGNED_BC4`* | | +| 0x1e |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC5`* | | +| 0x1f |*`CU_RES_VIEW_FORMAT_SIGNED_BC5`* | | +| 0x20 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC6H`* | | +| 0x21 |*`CU_RES_VIEW_FORMAT_SIGNED_BC6H`* | | +| 0x22 |*`CU_RES_VIEW_FORMAT_UNSIGNED_BC7`* | | +| enum |***`CUresult`*** |***`hipError_t`*** | +| 0 |*`CUDA_SUCCESS`* |*`hipSuccess`* | +| 1 |*`CUDA_ERROR_INVALID_VALUE`* |*`hipErrorInvalidValue`* | +| 2 |*`CUDA_ERROR_OUT_OF_MEMORY`* |*`hipErrorMemoryAllocation`* | +| 3 |*`CUDA_ERROR_NOT_INITIALIZED`* |*`hipErrorNotInitialized`* | +| 4 |*`CUDA_ERROR_DEINITIALIZED`* |*`hipErrorDeinitialized`* | +| 5 |*`CUDA_ERROR_PROFILER_DISABLED`* |*`hipErrorProfilerDisabled`* | +| 6 |*`CUDA_ERROR_PROFILER_NOT_INITIALIZED`* |*`hipErrorProfilerNotInitialized`* | +| 7 |*`CUDA_ERROR_PROFILER_ALREADY_STARTED`* |*`hipErrorProfilerAlreadyStarted`* | +| 8 |*`CUDA_ERROR_PROFILER_ALREADY_STOPPED`* |*`hipErrorProfilerAlreadyStopped`* | +| 100 |*`CUDA_ERROR_NO_DEVICE`* |*`hipErrorNoDevice`* | +| 101 |*`CUDA_ERROR_INVALID_DEVICE`* |*`hipErrorInvalidDevice`* | +| 200 |*`CUDA_ERROR_INVALID_IMAGE`* |*`hipErrorInvalidImage`* | +| 201 |*`CUDA_ERROR_INVALID_CONTEXT`* |*`hipErrorInvalidContext`* | +| 202 |*`CUDA_ERROR_CONTEXT_ALREADY_CURRENT`* |*`hipErrorContextAlreadyCurrent`* | +| 205 |*`CUDA_ERROR_MAP_FAILED`* |*`hipErrorMapFailed`* | +| 206 |*`CUDA_ERROR_UNMAP_FAILED`* |*`hipErrorUnmapFailed`* | +| 207 |*`CUDA_ERROR_ARRAY_IS_MAPPED`* |*`hipErrorArrayIsMapped`* | +| 208 |*`CUDA_ERROR_ALREADY_MAPPED`* |*`hipErrorAlreadyMapped`* | +| 209 |*`CUDA_ERROR_NO_BINARY_FOR_GPU`* |*`hipErrorNoBinaryForGpu* | +| 210 |*`CUDA_ERROR_ALREADY_ACQUIRED`* |*`hipErrorAlreadyAcquired* | +| 211 |*`CUDA_ERROR_NOT_MAPPED`* |*`hipErrorNotMapped`* | +| 212 |*`CUDA_ERROR_NOT_MAPPED_AS_ARRAY`* |*`hipErrorNotMappedAsArray`* | +| 213 |*`CUDA_ERROR_NOT_MAPPED_AS_POINTER`* |*`hipErrorNotMappedAsPointer`* | +| 214 |*`CUDA_ERROR_ECC_UNCORRECTABLE`* |*`hipErrorECCNotCorrectable`* | +| 215 |*`CUDA_ERROR_UNSUPPORTED_LIMIT`* |*`hipErrorUnsupportedLimit`* | +| 216 |*`CUDA_ERROR_CONTEXT_ALREADY_IN_USE`* |*`hipErrorContextAlreadyInUse`* | +| 217 |*`CUDA_ERROR_PEER_ACCESS_UNSUPPORTED`* |*`hipErrorPeerAccessUnsupported`* | +| 218 |*`CUDA_ERROR_INVALID_PTX`* |*`hipErrorInvalidKernelFile`* | +| 219 |*`CUDA_ERROR_INVALID_GRAPHICS_CONTEXT`* |*`hipErrorInvalidGraphicsContext`* | +| 300 |*`CUDA_ERROR_INVALID_SOURCE`* |*`hipErrorInvalidSource`* | +| 301 |*`CUDA_ERROR_FILE_NOT_FOUND`* |*`hipErrorFileNotFound`* | +| 302 |*`CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND`* |*`hipErrorSharedObjectSymbolNotFound`* | +| 303 |*`CUDA_ERROR_SHARED_OBJECT_INIT_FAILED`* |*`hipErrorSharedObjectInitFailed`* | +| 304 |*`CUDA_ERROR_OPERATING_SYSTEM`* |*`hipErrorOperatingSystem`* | +| 400 |*`CUDA_ERROR_INVALID_HANDLE`* |*`hipErrorInvalidResourceHandle`* | +| 500 |*`CUDA_ERROR_NOT_FOUND`* |*`hipErrorNotFound`* | +| 600 |*`CUDA_ERROR_NOT_READY`* |*`hipErrorNotReady`* | +| 700 |*`CUDA_ERROR_ILLEGAL_ADDRESS`* |*`hipErrorIllegalAddress`* | +| 701 |*`CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES`* |*`hipErrorLaunchOutOfResources`* | +| 702 |*`CUDA_ERROR_LAUNCH_TIMEOUT`* |*`hipErrorLaunchTimeOut`* | +| 703 |*`CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING`* | | +| 704 |*`CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED`* |*`hipErrorPeerAccessAlreadyEnabled`* | +| 705 |*`CUDA_ERROR_PEER_ACCESS_NOT_ENABLED`* |*`hipErrorPeerAccessNotEnabled`* | +| 708 |*`CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE`* | | +| 709 |*`CUDA_ERROR_CONTEXT_IS_DESTROYED`* | | +| 710 |*`CUDA_ERROR_ASSERT`* | | +| 711 |*`CUDA_ERROR_TOO_MANY_PEERS`* | | +| 712 |*`CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED`* |*`hipErrorHostMemoryAlreadyRegistered`* | +| 713 |*`CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED`* |*`hipErrorHostMemoryNotRegistered`* | +| 714 |*`CUDA_ERROR_HARDWARE_STACK_ERROR`* | | +| 715 |*`CUDA_ERROR_ILLEGAL_INSTRUCTION`* | | +| 716 |*`CUDA_ERROR_MISALIGNED_ADDRESS`* | | +| 717 |*`CUDA_ERROR_INVALID_ADDRESS_SPACE`* | | +| 718 |*`CUDA_ERROR_INVALID_PC`* | | +| 719 |*`CUDA_ERROR_LAUNCH_FAILED`* | | +| 800 |*`CUDA_ERROR_NOT_PERMITTED`* | | +| 801 |*`CUDA_ERROR_NOT_SUPPORTED`* | | +| 999 |*`CUDA_ERROR_UNKNOWN`* | | +| enum |***`CUstream_flags`*** |***`hipStreamFlags`*** | +| 0x0 |*`CU_STREAM_DEFAULT`* |*`hipStreamDefault`* | +| 0x1 |*`CU_STREAM_NON_BLOCKING`* |*`hipStreamNonBlocking`* | +| enum |***`CUGLDeviceList`*** | | +| 0x01 |*`CU_GL_DEVICE_LIST_ALL`* | | +| 0x02 |*`CU_GL_DEVICE_LIST_CURRENT_FRAME`* | | +| 0x03 |*`CU_GL_DEVICE_LIST_NEXT_FRAME`* | | +| enum |***`CUGLmap_flags`*** | | +| 0x00 |*`CU_GL_MAP_RESOURCE_FLAGS_NONE`* | | +| 0x01 |*`CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY`* | | +| 0x02 |*`CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD`* | | +| enum |***`CUd3d9DeviceList`*** | | +| 0x01 |*`CU_D3D9_DEVICE_LIST_ALL`* | | +| 0x02 |*`CU_D3D9_DEVICE_LIST_CURRENT_FRAME`* | | +| 0x03 |*`CU_D3D9_DEVICE_LIST_NEXT_FRAME`* | | +| enum |***`CUd3d9map_flags`*** | | +| 0x00 |*`CU_D3D9_MAPRESOURCE_FLAGS_NONE`* | | +| 0x01 |*`CU_D3D9_MAPRESOURCE_FLAGS_READONLY`* | | +| 0x02 |*`CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD`* | | +| enum |***`CUd3d9register_flags`*** | | +| 0x00 |*`CU_D3D9_REGISTER_FLAGS_NONE`* | | +| 0x01 |*`CU_D3D9_REGISTER_FLAGS_ARRAY`* | | +| enum |***`CUd3d10DeviceList`*** | | +| 0x01 |*`CU_D3D10_DEVICE_LIST_ALL`* | | +| 0x02 |*`CU_D3D10_DEVICE_LIST_CURRENT_FRAME`* | | +| 0x03 |*`CU_D3D10_DEVICE_LIST_NEXT_FRAME`* | | +| enum |***`CUd3d10map_flags`*** | | +| 0x00 |*`CU_D3D10_MAPRESOURCE_FLAGS_NONE`* | | +| 0x01 |*`CU_D3D10_MAPRESOURCE_FLAGS_READONLY`* | | +| 0x02 |*`CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD`* | | +| enum |***`CUd3d10register_flags`*** | | +| 0x00 |*`CU_D3D10_REGISTER_FLAGS_NONE`* | | +| 0x01 |*`CU_D3D10_REGISTER_FLAGS_ARRAY`* | | +| enum |***`CUd3d11DeviceList`*** | | +| 0x01 |*`CU_D3D11_DEVICE_LIST_ALL`* | | +| 0x02 |*`CU_D3D11_DEVICE_LIST_CURRENT_FRAME`* | | +| 0x03 |*`CU_D3D11_DEVICE_LIST_NEXT_FRAME`* | | +| typedef | `CUarray` | `hipArray *` | +| struct | `CUarray_st` | `hipArray` | +| typedef | `CUcontext` | `hipCtx_t` | +| typedef | `CUdevice` | `hipDevice_t` | +| typedef | `CUdeviceptr` | `hipDeviceptr_t` | +| typedef | `CUevent` | `hipEvent_t` | +| typedef | `CUfunction` | `hipFunction_t` | +| typedef | `CUgraphicsResource` | | +| typedef | `CUmipmappedArray` | | +| typedef | `CUmodule` | `hipModule_t` | +| typedef | `CUstream` | `hipStream_t` | +| typedef | `CUstreamCallback` | `hipStreamCallback_t` | +| typedef | `CUsurfObject` | | +| typedef | `CUsurfref` | | +| typedef | `CUtexObject` | | +| typedef | `CUtexref` | | +| define |`CU_IPC_HANDLE_SIZE` | | +| define |`CU_LAUNCH_PARAM_BUFFER_POINTER` | `HIP_LAUNCH_PARAM_BUFFER_POINTER` | +| define |`CU_LAUNCH_PARAM_BUFFER_SIZE` | `HIP_LAUNCH_PARAM_BUFFER_SIZE` | +| define |`CU_LAUNCH_PARAM_END` | `HIP_LAUNCH_PARAM_END` | +| define |`CU_MEMHOSTALLOC_DEVICEMAP` | | +| define |`CU_MEMHOSTALLOC_PORTABLE` | | +| define |`CU_MEMHOSTALLOC_WRITECOMBINED` | | +| define |`CU_MEMHOSTREGISTER_DEVICEMAP` | | +| define |`CU_MEMHOSTREGISTER_IOMEMORY` | | +| define |`CU_MEMHOSTREGISTER_PORTABLE` | | +| define |`CU_PARAM_TR_DEFAULT` | | +| define |`CU_STREAM_LEGACY` | | +| define |`CU_STREAM_PER_THREAD` | | +| define |`CU_TRSA_OVERRIDE_FORMAT` | | +| define |`CU_TRSF_NORMALIZED_COORDINATES` | | +| define |`CU_TRSF_SRGB` | | +| define |`CUDA_ARRAY3D_2DARRAY` | | +| define |`CUDA_ARRAY3D_CUBEMAP` | | +| define |`CUDA_ARRAY3D_DEPTH_TEXTURE` | | +| define |`CUDA_ARRAY3D_LAYERED` | | +| define |`CUDA_ARRAY3D_SURFACE_LDST` | | +| define |`CUDA_ARRAY3D_TEXTURE_GATHER` | | +| define |`CUDA_VERSION` | | ## **2. Error Handling** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuGetErrorName` | | Gets the string representation of an error code enum name. | -| `cuGetErrorString` | | Gets the string description of an error code. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuGetErrorName` | | +| `cuGetErrorString` | | ## **3. Initialization** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuInit` | `hipInit` | Initialize the CUDA driver API. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuInit` | `hipInit` | ## **4. Version Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuDriverGetVersion` | `hipDriverGetVersion` | Returns the CUDA driver version. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuDriverGetVersion` | `hipDriverGetVersion` | ## **5. Device Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuDriverGetVersion` | `hipGetDevice` | Returns a handle to a compute device. | -| `cuDeviceGetAttribute` | `hipDeviceGetAttribute` | Returns information about the device. | -| `cuDeviceGetCount` | `hipGetDeviceCount` | Returns the number of compute-capable devices. | -| `cuDeviceGetName` | `hipDeviceGetName` | Returns an identifer string for the device. | -| `cuDeviceTotalMem` | `hipDeviceTotalMem` | Returns the total amount of memory on the device. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuDriverGetVersion` | `hipGetDevice` | +| `cuDeviceGetAttribute` | `hipDeviceGetAttribute` | +| `cuDeviceGetCount` | `hipGetDeviceCount` | +| `cuDeviceGetName` | `hipDeviceGetName` | +| `cuDeviceTotalMem` | `hipDeviceTotalMem` | ## **6. Device Management [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuDeviceComputeCapability` | `hipDeviceComputeCapability` | Returns the compute capability of the device. | -| `cuDeviceGetProperties` | `hipGetDeviceProperties` | Returns properties for a selected device. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuDeviceComputeCapability` | `hipDeviceComputeCapability` | +| `cuDeviceGetProperties` | `hipGetDeviceProperties` | ## **7. Primary Context Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuDevicePrimaryCtxGetState` | `hipDevicePrimaryCtxGetState` | Get the state of the primary context. | -| `cuDevicePrimaryCtxRelease` | `hipDevicePrimaryCtxRelease` | Release the primary context on the GPU. | -| `cuDevicePrimaryCtxReset` | `hipDevicePrimaryCtxReset` | Destroy all allocations and reset all state on the primary context. | -| `cuDevicePrimaryCtxRetain` | `hipDevicePrimaryCtxRetain` | Retain the primary context on the GPU. | -| `cuDevicePrimaryCtxSetFlags` | `hipDevicePrimaryCtxSetFlags` | Set flags for the primary context. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuDevicePrimaryCtxGetState` | `hipDevicePrimaryCtxGetState` | +| `cuDevicePrimaryCtxRelease` | `hipDevicePrimaryCtxRelease` | +| `cuDevicePrimaryCtxReset` | `hipDevicePrimaryCtxReset` | +| `cuDevicePrimaryCtxRetain` | `hipDevicePrimaryCtxRetain` | +| `cuDevicePrimaryCtxSetFlags` | `hipDevicePrimaryCtxSetFlags` | ## **8. Context Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuCtxCreate` | `hipCtxCreate` | Create a CUDA context. | -| `cuCtxDestroy` | `hipCtxDestroy` | Destroy a CUDA context. | -| `cuCtxGetApiVersion` | `hipCtxGetApiVersion` | Gets the context's API version. | -| `cuCtxGetCacheConfig` | `hipCtxGetCacheConfig` | Returns the preferred cache configuration for the current context. | -| `cuCtxGetCurrent` | `hipCtxGetCurrent` | Returns the CUDA context bound to the calling CPU thread. | -| `cuCtxGetDevice` | `hipCtxGetDevice` | Returns the device ID for the current context. | -| `cuCtxGetFlags` | `hipCtxGetFlags` | Returns the flags for the current context. | -| `cuCtxGetLimit` | | Returns resource limits. | -| `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` | Returns the current shared memory configuration for the current context. | -| `cuCtxGetStreamPriorityRange` | | Returns numerical values that correspond to the least and greatest stream priorities. | -| `cuCtxPopCurrent` | `hipCtxPopCurrent` | Pops the current CUDA context from the current CPU thread. | -| `cuCtxPushCurrent` | `hipCtxPushCurrent` | Pushes a context on the current CPU thread. | -| `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` | Sets the preferred cache configuration for the current context. | -| `cuCtxSetCurrent` | `hipCtxSetCurrent` | Binds the specified CUDA context to the calling CPU thread. | -| `cuCtxSetLimit` | | Set resource limits. | -| `cuCtxSetSharedMemConfig` | `hipCtxSetSharedMemConfig` | Sets the shared memory configuration for the current context. | -| `cuCtxSynchronize` | `hipCtxSynchronize` | Block for a context's tasks to complete. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuCtxCreate` | `hipCtxCreate` | +| `cuCtxDestroy` | `hipCtxDestroy` | +| `cuCtxGetApiVersion` | `hipCtxGetApiVersion` | +| `cuCtxGetCacheConfig` | `hipCtxGetCacheConfig` | +| `cuCtxGetCurrent` | `hipCtxGetCurrent` | +| `cuCtxGetDevice` | `hipCtxGetDevice` | +| `cuCtxGetFlags` | `hipCtxGetFlags` | +| `cuCtxGetLimit` | | +| `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` | +| `cuCtxGetStreamPriorityRange` | | +| `cuCtxPopCurrent` | `hipCtxPopCurrent` | +| `cuCtxPushCurrent` | `hipCtxPushCurrent` | +| `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` | +| `cuCtxSetCurrent` | `hipCtxSetCurrent` | +| `cuCtxSetLimit` | | +| `cuCtxSetSharedMemConfig` | `hipCtxSetSharedMemConfig` | +| `cuCtxSynchronize` | `hipCtxSynchronize` | ## **9. Context Management [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuCtxAttach` | | Increment a context's usage-count. | -| `cuCtxDetach` | | Decrement a context's usage-count. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuCtxAttach` | | +| `cuCtxDetach` | | ## **10. Module Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuLinkAddData` | | Add an input to a pending linker invocation. | -| `cuLinkAddFile` | | Add a file input to a pending linker invocation. | -| `cuLinkComplete` | | Complete a pending linker invocation. | -| `cuLinkCreate` | | Creates a pending JIT linker invocation. | -| `cuLinkDestroy` | | Destroys state for a JIT linker invocation. | -| `cuModuleGetFunction` | `hipModuleGetFunction` | Returns a function handle. | -| `cuModuleGetGlobal` | `hipModuleGetGlobal` | Returns a global pointer from a module. | -| `cuModuleGetSurfRef` | | Returns a handle to a surface reference. | -| `cuModuleGetTexRef` | | Returns a handle to a texture reference. | -| `cuModuleLoad` | `hipModuleLoad` | Loads a compute module. | -| `cuModuleLoadData` | `hipModuleLoadData` | Load a module's data. | -| `cuModuleLoadDataEx` | `hipModuleLoadDataEx` | Load a module's data with options. | -| `cuModuleLoadFatBinary` | | Load a module's data. | -| `cuModuleUnload` | `hipModuleUnload` | Unloads a module. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuLinkAddData` | | +| `cuLinkAddFile` | | +| `cuLinkComplete` | | +| `cuLinkCreate` | | +| `cuLinkDestroy` | | +| `cuModuleGetFunction` | `hipModuleGetFunction` | +| `cuModuleGetGlobal` | `hipModuleGetGlobal` | +| `cuModuleGetSurfRef` | | +| `cuModuleGetTexRef` | | +| `cuModuleLoad` | `hipModuleLoad` | +| `cuModuleLoadData` | `hipModuleLoadData` | +| `cuModuleLoadDataEx` | `hipModuleLoadDataEx` | +| `cuModuleLoadFatBinary` | | +| `cuModuleUnload` | `hipModuleUnload` | ## **11. Memory Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuArray3DCreate` | | Creates a 3D CUDA array. | -| `cuArray3DGetDescriptor` | | Get a 3D CUDA array descriptor. | -| `cuArrayCreate` | | Creates a 1D or 2D CUDA array. | -| `cuArrayDestroy` | | Destroys a CUDA array. | -| `cuArrayGetDescriptor` | | Get a 1D or 2D CUDA array descriptor. | -| `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | Returns a handle to a compute device. | -| `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | Returns a PCI Bus Id string for the device. | -| `cuIpcCloseMemHandle` | | Close memory mapped with cuIpcOpenMemHandle. | -| `cuIpcGetEventHandle` | | Gets an interprocess handle for a previously allocated event. | -| `cuIpcGetMemHandle` | | Gets an interprocess memory handle for an existing device memory allocation. | -| `cuIpcOpenEventHandle` | | Opens an interprocess event handle for use in the current process. | -| `cuIpcOpenMemHandle` | | Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process. | -| `cuMemAlloc` | `hipMalloc` | Allocates device memory. | -| `cuMemAllocHost` | | Allocates page-locked host memory. | -| `cuMemAllocManaged` | | Allocates memory that will be automatically managed by the Unified Memory system. | -| `cuMemAllocPitch` | | Allocates pitched device memory. | -| `cuMemcpy` | | Copies memory. | -| `cuMemcpy2D` | | Copies memory for 2D arrays. | -| `cuMemcpy2DAsync` | | Copies memory for 2D arrays. | -| `cuMemcpy2DUnaligned` | | Copies memory for 2D arrays. | -| `cuMemcpy3D` | | Copies memory for 3D arrays. | -| `cuMemcpy3DAsync` | | Copies memory for 3D arrays. | -| `cuMemcpy3DPeer` | | Copies memory between contexts. | -| `cuMemcpy3DPeerAsync` | | Copies memory between contexts asynchronously. | -| `cuMemcpyAsync` | | Copies memory asynchronously. | -| `cuMemcpyAtoA` | | Copies memory from Array to Array. | -| `cuMemcpyAtoD` | | Copies memory from Array to Device. | -| `cuMemcpyAtoH` | | Copies memory from Array to Host. | -| `cuMemcpyAtoHAsync` | | Copies memory from Array to Host. | -| `cuMemcpyDtoA` | | Copies memory from Device to Array. | -| `cuMemcpyDtoD` | `hipMemcpyDtoD` | Copies memory from Device to Device. | -| `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` | Copies memory from Device to Device. | -| `cuMemcpyDtoH` | `hipMemcpyDtoH` | Copies memory from Device to Host. | -| `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` | Copies memory from Device to Host. | -| `cuMemcpyHtoA` | | Copies memory from Host to Array. | -| `cuMemcpyHtoAAsync` | | Copies memory from Host to Array. | -| `cuMemcpyHtoD` | `hipMemcpyHtoD` | Copies memory from Host to Device. | -| `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` | Copies memory from Host to Device. | -| `cuMemcpyPeer` | | Copies device memory between two contexts. | -| `cuMemcpyPeerAsync` | | Copies device memory between two contexts asynchronously. | -| `cuMemFree` | `hipFree` | Frees device memory. | -| `cuMemFreeHost` | `hipFreeHost` | Frees page-locked host memory. | -| `cuMemGetAddressRange` | | Get information on memory allocations. | -| `cuMemGetInfo` | `hipMemGetInfo` | Gets free and total memory. | -| `cuMemHostAlloc` | `hipHostMalloc` | Allocates page-locked host memory. | -| `cuMemHostGetDevicePointer` | | Passes back device pointer of mapped pinned memory. | -| `cuMemHostGetFlags` | | Passes back flags that were used for a pinned allocation. | -| `cuMemHostRegister` | `hipHostRegister` | Registers an existing host memory range for use by CUDA. | -| `cuMemHostUnregister` | `hipHostUnregister` | Unregisters a memory range that was registered with cuMemHostRegister. | -| `cuMemsetD16` | | Initializes device memory. | -| `cuMemsetD16Async` | | Sets device memory. | -| `cuMemsetD2D16` | | Initializes device memory. | -| `cuMemsetD2D16Async` | | Sets device memory. | -| `cuMemsetD2D32` | | Initializes device memory. | -| `cuMemsetD2D32Async` | | Sets device memory. | -| `cuMemsetD2D8` | | Initializes device memory. | -| `cuMemsetD2D8Async` | | Sets device memory. | -| `cuMemsetD32` | `hipMemset` | Initializes device memory. | -| `cuMemsetD32Async` | `hipMemsetAsync` | Sets device memory. | -| `cuMemsetD2D8` | | Initializes device memory. | -| `cuMemsetD2D8Async` | | Sets device memory. | -| `cuMipmappedArrayCreate` | | Creates a CUDA mipmapped array. | -| `cuMipmappedArrayDestroy` | | Destroys a CUDA mipmapped array. | -| `cuMipmappedArrayGetLevel` | | Gets a mipmap level of a CUDA mipmapped array. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuArray3DCreate` | | +| `cuArray3DGetDescriptor` | | +| `cuArrayCreate` | | +| `cuArrayDestroy` | | +| `cuArrayGetDescriptor` | | +| `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | +| `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | +| `cuIpcCloseMemHandle` | | +| `cuIpcGetEventHandle` | | +| `cuIpcGetMemHandle` | | +| `cuIpcOpenEventHandle` | | +| `cuIpcOpenMemHandle` | | +| `cuMemAlloc` | `hipMalloc` | +| `cuMemAllocHost` | | +| `cuMemAllocManaged` | | +| `cuMemAllocPitch` | | +| `cuMemcpy` | | +| `cuMemcpy2D` | | +| `cuMemcpy2DAsync` | | +| `cuMemcpy2DUnaligned` | | +| `cuMemcpy3D` | | +| `cuMemcpy3DAsync` | | +| `cuMemcpy3DPeer` | | +| `cuMemcpy3DPeerAsync` | | +| `cuMemcpyAsync` | | +| `cuMemcpyAtoA` | | +| `cuMemcpyAtoD` | | +| `cuMemcpyAtoH` | | +| `cuMemcpyAtoHAsync` | | +| `cuMemcpyDtoA` | | +| `cuMemcpyDtoD` | `hipMemcpyDtoD` | +| `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` | +| `cuMemcpyDtoH` | `hipMemcpyDtoH` | +| `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` | +| `cuMemcpyHtoA` | | +| `cuMemcpyHtoAAsync` | | +| `cuMemcpyHtoD` | `hipMemcpyHtoD` | +| `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` | +| `cuMemcpyPeer` | | +| `cuMemcpyPeerAsync` | | +| `cuMemFree` | `hipFree` | +| `cuMemFreeHost` | `hipFreeHost` | +| `cuMemGetAddressRange` | | +| `cuMemGetInfo` | `hipMemGetInfo` | +| `cuMemHostAlloc` | `hipHostMalloc` | +| `cuMemHostGetDevicePointer` | | +| `cuMemHostGetFlags` | | +| `cuMemHostRegister` | `hipHostRegister` | +| `cuMemHostUnregister` | `hipHostUnregister` | +| `cuMemsetD16` | | +| `cuMemsetD16Async` | | +| `cuMemsetD2D16` | | +| `cuMemsetD2D16Async` | | +| `cuMemsetD2D32` | | +| `cuMemsetD2D32Async` | | +| `cuMemsetD2D8` | | +| `cuMemsetD2D8Async` | | +| `cuMemsetD32` | `hipMemset` | +| `cuMemsetD32Async` | `hipMemsetAsync` | +| `cuMemsetD2D8` | | +| `cuMemsetD2D8Async` | | +| `cuMipmappedArrayCreate` | | +| `cuMipmappedArrayDestroy` | | +| `cuMipmappedArrayGetLevel` | | ## **12. Unified Addressing** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuMemAdvise` | | Advise about the usage of a given memory range. | -| `cuMemPrefetchAsync` | | Prefetches memory to the specified destination device. | -| `cuMemRangeGetAttribute` | | Query an attribute of a given memory range. | -| `cuMemRangeGetAttributes` | | Query attributes of a given memory range. | -| `cuPointerGetAttribute` | | Returns information about a pointer. | -| `cuPointerGetAttributes` | | Returns information about a pointer. | -| `cuPointerSetAttribute` | | Set attributes on a previously allocated memory region. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuMemAdvise` | | +| `cuMemPrefetchAsync` | | +| `cuMemRangeGetAttribute` | | +| `cuMemRangeGetAttributes` | | +| `cuPointerGetAttribute` | | +| `cuPointerGetAttributes` | | +| `cuPointerSetAttribute` | | ## **13. Stream Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuStreamAddCallback` | | Add a callback to a compute stream. | -| `cuStreamAttachMemAsync` | | Attach memory to a stream asynchronously. | -| `cuStreamCreate` | | Create a stream. | -| `cuStreamCreateWithPriority` | | Create a stream with the given priority. | -| `cuStreamDestroy` | `hipStreamDestroy` | Destroys a stream. | -| `cuStreamGetFlags` | `hipStreamGetFlags` | Query the flags of a given stream. | -| `cuStreamGetPriority` | `hipStreamGetPriority` | Query the priority of a given stream. | -| `cuStreamQuery` | `hipStreamQuery` | Determine status of a compute stream. | -| `cuStreamSynchronize` | `hipStreamSynchronize` | Wait until a stream's tasks are completed. | -| `cuStreamWaitEvent` | `hipStreamWaitEvent` | Make a compute stream wait on an event. | -| `cuStreamBatchMemOp` | | Batch operations to synchronize the stream via memory operations. | -| `cuStreamWaitValue32` | | Wait on a memory location. | -| `cuStreamWriteValue32` | | Write a value to memory. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuStreamAddCallback` | `hipStreamAddCallback` | +| `cuStreamAttachMemAsync` | | +| `cuStreamCreate` | | +| `cuStreamCreateWithPriority` | | +| `cuStreamDestroy` | `hipStreamDestroy` | +| `cuStreamGetFlags` | `hipStreamGetFlags` | +| `cuStreamGetPriority` | `hipStreamGetPriority` | +| `cuStreamQuery` | `hipStreamQuery` | +| `cuStreamSynchronize` | `hipStreamSynchronize` | +| `cuStreamWaitEvent` | `hipStreamWaitEvent` | +| `cuStreamBatchMemOp` | | +| `cuStreamWaitValue32` | | +| `cuStreamWriteValue32` | | ## **14. Event Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuEventCreate` | `hipEventCreate` | Creates an event. | -| `cuEventDestroy` | `hipEventDestroy` | Destroys an event. | -| `cuEventElapsedTime` | `hipEventElapsedTime` | Computes the elapsed time between two events. | -| `cuEventQuery` | `hipEventQuery` | Queries an event's status. | -| `cuEventRecord` | `hipEventRecord` | Records an event. | -| `cuEventSynchronize` | `hipEventSynchronize` | Waits for an event to complete. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuEventCreate` | `hipEventCreate` | +| `cuEventDestroy` | `hipEventDestroy` | +| `cuEventElapsedTime` | `hipEventElapsedTime` | +| `cuEventQuery` | `hipEventQuery` | +| `cuEventRecord` | `hipEventRecord` | +| `cuEventSynchronize` | `hipEventSynchronize` | ## **15. Execution Control** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cuFuncGetAttribute` | | Returns information about a function. | -| `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` | Sets the preferred cache configuration for a device function. | -| `cuFuncSetSharedMemConfig` | | Sets the shared memory configuration for a device function. | -| `cuLaunchKernel` | `hipModuleLaunchKernel` | Launches a CUDA function. | - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuFuncGetAttribute` | | +| `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` | +| `cuFuncSetSharedMemConfig` | | +| `cuLaunchKernel` | `hipModuleLaunchKernel` | ## **16. Execution Control [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuFuncSetBlockShape` | | +| `cuFuncSetSharedSize` | | +| `cuLaunch` | | +| `cuLaunchGrid` | | +| `cuLaunchGridAsync` | | +| `cuParamSetf` | | +| `cuParamSeti` | | +| `cuParamSetTexRef` | | +| `cuParamSetv` | | ## **17. Occupancy** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|---------------------------------------------------------| +| `cuOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | +| `cuOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | +| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | ## **18. Texture Reference Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuTexRefGetAddress` | | +| `cuTexRefGetAddressMode` | | +| `cuTexRefGetArray` | | +| `cuTexRefGetBorderColor` | | +| `cuTexRefGetFilterMode` | | +| `cuTexRefGetFlags` | | +| `cuTexRefGetFormat` | | +| `cuTexRefGetMaxAnisotropy` | | +| `cuTexRefGetMipmapFilterMode` | | +| `cuTexRefGetMipmapLevelBias` | | +| `cuTexRefGetMipmapLevelClamp` | | +| `cuTexRefGetMipmappedArray` | | +| `cuTexRefSetAddress` | | +| `cuTexRefSetAddress2D` | | +| `cuTexRefSetAddressMode` | | +| `cuTexRefSetArray` | | +| `cuTexRefSetBorderColor` | | +| `cuTexRefSetFilterMode` | | +| `cuTexRefSetFlags` | | +| `cuTexRefSetFormat` | | +| `cuTexRefSetMaxAnisotropy` | | +| `cuTexRefSetMipmapFilterMode` | | +| `cuTexRefSetMipmapLevelBias` | | +| `cuTexRefSetMipmapLevelClamp` | | +| `cuTexRefSetMipmappedArray` | | ## **19. Texture Reference Management [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuTexRefCreate` | | +| `cuTexRefDestroy` | | ## **20. Surface Reference Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuSurfRefGetArray` | | +| `cuSurfRefSetArray` | | ## **21. Texture Object Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuTexObjectCreate` | | +| `cuTexObjectDestroy` | | +| `cuTexObjectGetResourceDesc` | | +| `cuTexObjectGetResourceViewDesc` | | +| `cuTexObjectGetTextureDesc` | | ## **22. Surface Object Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuSurfObjectCreate` | | +| `cuSurfObjectDestroy` | | +| `cuSurfObjectGetResourceDesc` | | ## **23. Peer Context Memory Access** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuCtxEnablePeerAccess` | `hipCtxEnablePeerAccess` | +| `cuCtxDisablePeerAccess` | `hipCtxDisablePeerAccess` | +| `cuDeviceCanAccessPeer` | `hipDeviceCanAccessPeer` | +| `cuDeviceGetP2PAttribute` | | ## **24. Graphics Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuGraphicsMapResources` | | +| `cuGraphicsResourceGetMappedMipmappedArray` | | +| `cuGraphicsResourceGetMappedPointer` | | +| `cuGraphicsResourceSetMapFlags` | | +| `cuGraphicsSubResourceGetMappedArray` | | +| `cuGraphicsUnmapResources` | | +| `cuGraphicsUnregisterResource` | | ## **25. Profiler Control** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| - +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuProfilerInitialize` | | +| `cuProfilerStart` | `hipProfilerStart` | +| `cuProfilerStop` | `hipProfilerStop` | ## **26. OpenGL Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuGLGetDevices` | | +| `cuGraphicsGLRegisterBuffer` | | +| `cuGraphicsGLRegisterImage` | | +| `cuWGLGetDevice` | | +## **26.1. OpenGL Interoperability [DEPRECATED]** +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuGLCtxCreate` | | +| `cuGLInit` | | +| `cuGLMapBufferObject` | | +| `cuGLMapBufferObjectAsync` | | +| `cuGLRegisterBufferObject` | | +| `cuGLSetBufferObjectMapFlags` | | +| `cuGLUnmapBufferObject` | | +| `cuGLUnmapBufferObjectAsync` | | +| `cuGLUnregisterBufferObject` | | ## **27. Direct3D 9 Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D9CtxCreate` | | +| `cuD3D9CtxCreateOnDevice` | | +| `cuD3D9GetDevice` | | +| `cuD3D9GetDevices` | | +| `cuD3D9GetDirect3DDevice` | | +| `cuGraphicsD3D9RegisterResource` | | +## **27.1. Direct3D 9 Interoperability [DEPRECATED]** +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D9MapResources` | | +| `cuD3D9RegisterResource` | | +| `cuD3D9ResourceGetMappedArray` | | +| `cuD3D9ResourceGetMappedPitch` | | +| `cuD3D9ResourceGetMappedPointer` | | +| `cuD3D9ResourceGetMappedSize` | | +| `cuD3D9ResourceGetSurfaceDimensions` | | +| `cuD3D9ResourceSetMapFlags` | | +| `cuD3D9UnmapResources` | | +| `cuD3D9UnregisterResource` | | ## **28. Direct3D 10 Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D10GetDevice` | | +| `cuD3D10GetDevices` | | +| `cuGraphicsD3D10RegisterResource` | | +## **28.1. Direct3D 10 Interoperability [DEPRECATED]** +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D10CtxCreate` | | +| `cuD3D10CtxCreateOnDevice` | | +| `cuD3D10GetDirect3DDevice` | | +| `cuD3D10MapResources` | | +| `cuD3D10RegisterResource` | | +| `cuD3D10ResourceGetMappedArray` | | +| `cuD3D10ResourceGetMappedPitch` | | +| `cuD3D10ResourceGetMappedPointer` | | +| `cuD3D10ResourceGetMappedSize` | | +| `cuD3D10ResourceGetSurfaceDimensions` | | +| `cuD3D10ResourceSetMapFlags` | | +| `cuD3D10UnmapResources` | | +| `cuD3D10UnregisterResource` | | ## **29. Direct3D 11 Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D11GetDevice` | | +| `cuD3D11GetDevices` | | +| `cuGraphicsD3D11RegisterResource` | | +## **29.1. Direct3D 11 Interoperability [DEPRECATED]** +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuD3D11CtxCreate` | | +| `cuD3D11CtxCreateOnDevice` | | +| `cuD3D11GetDirect3DDevice` | | ## **30. VDPAU Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuGraphicsVDPAURegisterOutputSurface` | | +| `cuGraphicsVDPAURegisterVideoSurface` | | +| `cuVDPAUCtxCreate` | | +| `cuVDPAUGetDevice` | | +## **31. EGL Interoperability** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cuEGLStreamConsumerAcquireFrame` | | +| `cuEGLStreamConsumerConnect` | | +| `cuEGLStreamConsumerConnectWithFlags` | | +| `cuEGLStreamConsumerDisconnect` | | +| `cuEGLStreamConsumerReleaseFrame` | | +| `cuEGLStreamProducerConnect` | | +| `cuEGLStreamProducerDisconnect` | | +| `cuEGLStreamProducerPresentFrame` | | +| `cuEGLStreamProducerReturnFrame` | | +| `cuGraphicsEGLRegisterImage` | | +| `cuGraphicsResourceGetMappedEglFrame` | | diff --git a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index c5df7f6bcd..73994026e3 100644 --- a/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -2,636 +2,784 @@ ## **1. Device Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaChooseDevice` | `hipChooseDevice` | Select compute-device which best matches criteria. | -| `cudaDeviceGetAttribute` | `hipDeviceGetAttribute` | Returns information about the device. | -| `cudaDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | Returns a handle to a compute device. | -| `cudaDeviceGetCacheConfig` | `hipDeviceGetCacheConfig` | Returns the preferred cache configuration for the current device. | -| `cudaDeviceGetLimit` | `hipDeviceGetLimit` | Returns resource limits. | -| `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | Returns a PCI Bus Id string for the device. | -| `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | Returns the shared memory configuration for the current device. | -| `cudaDeviceGetStreamPriorityRange` | | Returns numerical values that correspond to the least and greatest stream priorities. | -| `cudaDeviceReset` | `hipDeviceReset` | Destroy all allocations and reset all state on the current device in the current process. | -| `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | Sets the preferred cache configuration for the current device. | -| `cudaDeviceSetLimit` | `hipDeviceSetLimit` | Set resource limits. | -| `cudaDeviceSetSharedMemConfig` | `hipDeviceSetSharedMemConfig` | Sets the shared memory configuration for the current device. | -| `cudaDeviceSynchronize` | `hipDeviceSynchronize` | Wait for compute device to finish. | -| `cudaGetDevice` | `hipGetDevice` | Returns which device is currently being used. | -| `cudaGetDeviceCount` | `hipGetDeviceCount` | Returns the number of compute-capable devices. | -| `cudaGetDeviceFlags` | | Gets the flags for the current device. | -| `cudaGetDeviceProperties` | `hipGetDeviceProperties` | Returns information about the compute-device. | -| `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` | Close memory mapped with cudaIpcOpenMemHandle. | -| `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` | Gets an interprocess handle for a previously allocated event. | -| `cudaIpcGetMemHandle` | `hipIpcGetMemHandle` | Gets an interprocess memory handle for an existing device memory allocation. | -| `cudaIpcOpenEventHandle` | `hipIpcOpenEventHandle` | Opens an interprocess event handle for use in the current process. | -| `cudaIpcOpenMemHandle` | `hipIpcOpenMemHandle` | Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process. | -| `cudaSetDevice` | `hipSetDevice` | Set device to be used for GPU executions. | -| `cudaSetDeviceFlags` | `hipSetDeviceFlags` | Sets flags to be used for device executions. | -| `cudaSetValidDevices` | | Set a list of devices that can be used for CUDA. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaChooseDevice` | `hipChooseDevice` | +| `cudaDeviceGetAttribute` | `hipDeviceGetAttribute` | +| `cudaDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | +| `cudaDeviceGetCacheConfig` | `hipDeviceGetCacheConfig` | +| `cudaDeviceGetLimit` | `hipDeviceGetLimit` | +| `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | +| `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | +| `cudaDeviceGetStreamPriorityRange` | | +| `cudaDeviceReset` | `hipDeviceReset` | +| `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | +| `cudaDeviceSetLimit` | `hipDeviceSetLimit` | +| `cudaDeviceSetSharedMemConfig` | `hipDeviceSetSharedMemConfig` | +| `cudaDeviceSynchronize` | `hipDeviceSynchronize` | +| `cudaGetDevice` | `hipGetDevice` | +| `cudaGetDeviceCount` | `hipGetDeviceCount` | +| `cudaGetDeviceFlags` | | +| `cudaGetDeviceProperties` | `hipGetDeviceProperties` | +| `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` | +| `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` | +| `cudaIpcGetMemHandle` | `hipIpcGetMemHandle` | +| `cudaIpcOpenEventHandle` | `hipIpcOpenEventHandle` | +| `cudaIpcOpenMemHandle` | `hipIpcOpenMemHandle` | +| `cudaSetDevice` | `hipSetDevice` | +| `cudaSetDeviceFlags` | `hipSetDeviceFlags` | +| `cudaSetValidDevices` | | -## **2. Error Handling** +## **2. Thread Management [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaGetErrorName` | `hipGetErrorName` | Returns the string representation of an error code enum name. | -| `cudaGetErrorString` | `hipGetErrorString` | Returns the description string for an error code. | -| `cudaGetLastError` | `hipGetLastError` | Returns the last error from a runtime call. | -| `cudaPeekAtLastError` | `hipPeekAtLastError` | Returns the last error from a runtime call. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaThreadExit` | `hipDeviceReset` | +| `cudaThreadGetCacheConfig` | `hipDeviceGetCacheConfig` | +| `cudaThreadGetLimit` | | +| `cudaThreadSetCacheConfig` | `hipDeviceSetCacheConfig` | +| `cudaThreadSetLimit` | | +| `cudaThreadSynchronize` | `hipDeviceSynchronize` | -## **3. Stream Management** +## **3. Error Handling** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaStreamAddCallback` | `hipStreamAddCallback` | Add a callback to a compute stream. | -| `cudaStreamAttachMemAsync` | | Attach managed memory to a stream asynchronously. | -| `cudaStreamCreate` | `hipStreamCreate` | Create an asynchronous stream. | -| `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` | Create an asynchronous stream. | -| `cudaStreamCreateWithPriority` | | Create an asynchronous stream with the specified priority. | -| `cudaStreamDestroy` | `hipStreamDestroy` | Destroys and cleans up an asynchronous stream. | -| `cudaStreamGetFlags` | `hipStreamGetFlags` | Query the flags of a stream. | -| `cudaStreamGetPriority` | | Query the priority of a stream. | -| `cudaStreamQuery` | `hipStreamQuery` | Queries an asynchronous stream for completion status. | -| `cudaStreamSynchronize` | `hipStreamSynchronize` | Waits for stream tasks to complete. | -| `cudaStreamWaitEvent` | `hipStreamWaitEvent` | Make a compute stream wait on an event. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaGetErrorName` | `hipGetErrorName` | +| `cudaGetErrorString` | `hipGetErrorString` | +| `cudaGetLastError` | `hipGetLastError` | +| `cudaPeekAtLastError` | `hipPeekAtLastError` | -## **4. Event Management** +## **4. Stream Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaEventCreate` | `hipEventCreate` | Creates an event object. | -| `cudaEventCreateWithFlags` | `hipEventCreateWithFlags` | Creates an event object with the specified flags. | -| `cudaEventDestroy` | `hipEventDestroy` | Destroys an event object. | -| `cudaEventElapsedTime` | `hipEventElapsedTime` | Computes the elapsed time between events. | -| `cudaEventQuery` | `hipEventQuery` | Queries an event's status. | -| `cudaEventRecord` | `hipEventRecord` | Records an event. | -| `cudaEventSynchronize` | `hipEventSynchronize` | Waits for an event to complete. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaStreamAddCallback` | `hipStreamAddCallback` | +| `cudaStreamAttachMemAsync` | | +| `cudaStreamCreate` | `hipStreamCreate` | +| `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` | +| `cudaStreamCreateWithPriority` | | +| `cudaStreamDestroy` | `hipStreamDestroy` | +| `cudaStreamGetFlags` | `hipStreamGetFlags` | +| `cudaStreamGetPriority` | | +| `cudaStreamQuery` | `hipStreamQuery` | +| `cudaStreamSynchronize` | `hipStreamSynchronize` | +| `cudaStreamWaitEvent` | `hipStreamWaitEvent` | -## **5. Execution Control** +## **5. Event Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaFuncGetAttributes` | | Find out attributes for a given function. | -| `cudaFuncSetCacheConfig` | `hipFuncSetCacheConfig` | Sets the preferred cache configuration for a device function. | -| `cudaFuncSetSharedMemConfig` | | Sets the shared memory configuration for a device function. | -| `cudaGetParameterBuffer` | | Obtains a parameter buffer. | -| `cudaGetParameterBufferV2` | | Launches a specified kernel. | -| `cudaLaunchKernel` | `hipLaunchKernel` | Launches a device function. | -| `cudaSetDoubleForDevice` | | Converts a double argument to be executed on a device. | -| `cudaSetDoubleForHost` | | Converts a double argument after execution on a device. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaEventCreate` | `hipEventCreate` | +| `cudaEventCreateWithFlags` | `hipEventCreateWithFlags` | +| `cudaEventDestroy` | `hipEventDestroy` | +| `cudaEventElapsedTime` | `hipEventElapsedTime` | +| `cudaEventQuery` | `hipEventQuery` | +| `cudaEventRecord` | `hipEventRecord` | +| `cudaEventSynchronize` | `hipEventSynchronize` | -## **6. Occupancy** +## **6. Execution Control** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`| Returns occupancy for a device function. | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | Returns occupancy for a device function with the specified flags. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaFuncGetAttributes` | | +| `cudaFuncSetCacheConfig` | `hipFuncSetCacheConfig` | +| `cudaFuncSetSharedMemConfig` | | +| `cudaGetParameterBuffer` | | +| `cudaGetParameterBufferV2` | | +| `cudaLaunchKernel` | `hipLaunchKernel` | +| `cudaSetDoubleForDevice` | | +| `cudaSetDoubleForHost` | | -## **7. Execution Control [deprecated since 7.0]** +## **7. Occupancy** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaConfigureCall` | | Configure a device-launch. | -| `cudaLaunch` | | Launches a device function. | -| `cudaSetupArgument` | | Configure a device launch. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`| +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | -## **8. Memory Management** +## **8. Execution Control [deprecated since 7.0]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaArrayGetInfo` | | Gets info about the specified cudaArray. | -| `cudaFree` | `hipFree` | Frees memory on the device. | -| `cudaFreeArray` | `hipFreeArray` | Frees an array on the device. | -| `cudaFreeHost` | `hipHostFree` | Frees page-locked memory. | -| `cudaFreeMipmappedArray` | | Frees a mipmapped array on the device. | -| `cudaGetMipmappedArrayLevel` | | Gets a mipmap level of a CUDA mipmapped array. | -| `cudaGetSymbolAddress` | | Finds the address associated with a CUDA symbol. | -| `cudaGetSymbolSize` | | Finds the size of the object associated with a CUDA symbol. | -| `cudaHostAlloc` | `hipHostMalloc` | Allocates page-locked memory on the host. | -| `cudaHostGetDevicePointer` | `hipHostGetDevicePointer` | Passes back device pointer of mapped host memory allocated by cudaHostAlloc or registered by cudaHostRegister. | -| `cudaHostGetFlags` | `hipHostGetFlags` | Passes back flags used to allocate pinned host memory allocated by cudaHostAlloc. | -| `cudaHostRegister` | `hipHostRegister` | Registers an existing host memory range for use by CUDA. | -| `cudaHostUnregister` | `hipHostUnregister` | Unregisters a memory range that was registered with cudaHostRegister. | -| `cudaMalloc` | `hipMalloc` | Allocate memory on the device. | -| `cudaMalloc3D` | | Allocates logical 1D, 2D, or 3D memory objects on the device. | -| `cudaMalloc3DArray` | | Allocate an array on the device. | -| `cudaMallocArray` | `hipMallocArray` | Allocate an array on the device. | -| `cudaMallocHost` | `hipHostMalloc` | Allocates page-locked memory on the host. | -| `cudaMallocManaged` | | Allocates memory that will be automatically managed by the Unified Memory system. | -| `cudaMallocMipmappedArray` | | Allocate a mipmapped array on the device. | -| `cudaMallocPitch` | | Allocates pitched memory on the device. | -| `cudaMemGetInfo` | `hipMemGetInfo` | Gets free and total device memory. | -| `cudaMemcpy` | `hipMemcpy` | Copies data between host and device. | -| `cudaMemcpy2D` | `hipMemcpy2D` | Copies data between host and device. | -| `cudaMemcpy2DArrayToArray` | | Copies data between host and device. | -| `cudaMemcpy2DAsync` | | Copies data between host and device. | -| `cudaMemcpy2DFromArray` | | Copies data between host and device. | -| `cudaMemcpy2DFromArrayAsync` | | Copies data between host and device. | -| `cudaMemcpy2DToArray` | `hipMemcpy2DToArray` | Copies data between host and device. | -| `cudaMemcpy2DToArrayAsync` | | Copies data between host and device. | -| `cudaMemcpy3D` | | Copies data between 3D objects. | -| `cudaMemcpy3DAsync` | | Copies data between 3D objects. | -| `cudaMemcpy3DPeer` | | Copies memory between devices. | -| `cudaMemcpy3DPeerAsync` | | Copies memory between devices asynchronously. | -| `cudaMemcpyArrayToArray` | | Copies data between host and device. | -| `cudaMemcpyAsync` | `hipMemcpyAsync` | Copies data between host and device. | -| `cudaMemcpyFromArray` | `MemcpyFromArray` | Copies data between host and device. | -| `cudaMemcpyFromArrayAsync` | | Copies data between host and device. | -| `cudaMemcpyFromSymbol` | `hipMemcpyFromSymbol` | Copies data from the given symbol on the device. | -| `cudaMemcpyFromSymbolAsync` | | Copies data from the given symbol on the device. | -| `cudaMemcpyPeer` | `hipMemcpyPeer` | Copies memory between two devices. | -| `cudaMemcpyPeerAsync` | `hipMemcpyPeerAsync` | Copies memory between two devices asynchronously. | -| `cudaMemcpyToArray` | `hipMemcpyToArray` | Copies data between host and device. | -| `cudaMemcpyToArrayAsync` | | Copies data between host and device. | -| `cudaMemcpyToSymbol` | `hipMemcpyToSymbol` | Copies data to the given symbol on the device. | -| `cudaMemcpyToSymbolAsync` | `hipMemcpyToSymbolAsync` | Copies data to the given symbol on the device. | -| `cudaMemset` | `hipMemset` | Initializes or sets device memory to a value. | -| `cudaMemset2D` | | Initializes or sets device memory to a value. | -| `cudaMemset2DAsync` | | Initializes or sets device memory to a value. | -| `cudaMemset3D` | | Initializes or sets device memory to a value. | -| `cudaMemset3DAsync` | | Initializes or sets device memory to a value. | -| `cudaMemsetAsync` | `hipMemsetAsync` | Initializes or sets device memory to a value. | -| `make_cudaExtent` | | Returns a cudaExtent based on input parameters. | -| `make_cudaPitchedPtr` | | Returns a cudaPitchedPtr based on input parameters. | -| `make_cudaPos` | | Returns a cudaPos based on input parameters. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaConfigureCall` | | +| `cudaLaunch` | | +| `cudaSetupArgument` | | -## **9. Unified Addressing** +## **9. Memory Management** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaPointerGetAttributes` | `hipPointerGetAttributes` | Returns attributes about a specified pointer. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaArrayGetInfo` | | +| `cudaFree` | `hipFree` | +| `cudaFreeArray` | `hipFreeArray` | +| `cudaFreeHost` | `hipHostFree` | +| `cudaFreeMipmappedArray` | | +| `cudaGetMipmappedArrayLevel` | | +| `cudaGetSymbolAddress` | | +| `cudaGetSymbolSize` | | +| `cudaHostAlloc` | `hipHostMalloc` | +| `cudaHostGetDevicePointer` | `hipHostGetDevicePointer` | +| `cudaHostGetFlags` | `hipHostGetFlags` | +| `cudaHostRegister` | `hipHostRegister` | +| `cudaHostUnregister` | `hipHostUnregister` | +| `cudaMalloc` | `hipMalloc` | +| `cudaMalloc3D` | | +| `cudaMalloc3DArray` | `hipMalloc3DArray` | +| `cudaMallocArray` | `hipMallocArray` | +| `cudaMallocHost` | `hipHostMalloc` | +| `cudaMallocManaged` | | +| `cudaMallocMipmappedArray` | | +| `cudaMallocPitch` | | +| `cudaMemGetInfo` | `hipMemGetInfo` | +| `cudaMemcpy` | `hipMemcpy` | +| `cudaMemcpy2D` | `hipMemcpy2D` | +| `cudaMemcpy2DArrayToArray` | | +| `cudaMemcpy2DAsync` | | +| `cudaMemcpy2DFromArray` | | +| `cudaMemcpy2DFromArrayAsync` | | +| `cudaMemcpy2DToArray` | `hipMemcpy2DToArray` | +| `cudaMemcpy2DToArrayAsync` | | +| `cudaMemcpy3D` | `hipMemcpy3D` | +| `cudaMemcpy3DAsync` | | +| `cudaMemcpy3DPeer` | | +| `cudaMemcpy3DPeerAsync` | | +| `cudaMemcpyArrayToArray` | | +| `cudaMemcpyAsync` | `hipMemcpyAsync` | +| `cudaMemcpyFromArray` | `MemcpyFromArray` | +| `cudaMemcpyFromArrayAsync` | | +| `cudaMemcpyFromSymbol` | `hipMemcpyFromSymbol` | +| `cudaMemcpyFromSymbolAsync` | | +| `cudaMemcpyPeer` | `hipMemcpyPeer` | +| `cudaMemcpyPeerAsync` | `hipMemcpyPeerAsync` | +| `cudaMemcpyToArray` | `hipMemcpyToArray` | +| `cudaMemcpyToArrayAsync` | | +| `cudaMemcpyToSymbol` | `hipMemcpyToSymbol` | +| `cudaMemcpyToSymbolAsync` | `hipMemcpyToSymbolAsync` | +| `cudaMemset` | `hipMemset` | +| `cudaMemset2D` | `hipMemset2D` | +| `cudaMemset2DAsync` | | +| `cudaMemset3D` | | +| `cudaMemset3DAsync` | | +| `cudaMemsetAsync` | `hipMemsetAsync` | +| `make_cudaExtent` | `make_hipExtent` | +| `make_cudaPitchedPtr` | `make_hipPitchedPtr` | +| `make_cudaPos` | `make_hipPos` | -## **10. Peer Device Memory Access** +## **10. Unified Addressing** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaDeviceCanAccessPeer` | `hipDeviceCanAccessPeer` | Queries if a device may directly access a peer device's memory. | -| `cudaDeviceDisablePeerAccess` | `hipDeviceDisablePeerAccess` | Disables direct access to memory allocations on a peer device. | -| `cudaDeviceEnablePeerAccess` | `hipDeviceEnablePeerAccess` | Enables direct access to memory allocations on a peer device. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaPointerGetAttributes` | `hipPointerGetAttributes` | -## **11. OpenGL Interoperability** +## **11. Peer Device Memory Access** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaGLGetDevices` | | Gets the CUDA devices associated with the current OpenGL context. | -| `cudaGraphicsGLRegisterBuffer` | | Registers an OpenGL buffer object. | -| `cudaGraphicsGLRegisterImage` | | Register an OpenGL texture or renderbuffer object. | -| `cudaWGLGetDevice` | | Gets the CUDA device associated with hGpu. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaDeviceCanAccessPeer` | `hipDeviceCanAccessPeer` | +| `cudaDeviceDisablePeerAccess` | `hipDeviceDisablePeerAccess` | +| `cudaDeviceEnablePeerAccess` | `hipDeviceEnablePeerAccess` | -## **12. Graphics Interoperability** +## **12. OpenGL Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaGraphicsMapResources` | | Map graphics resources for access by CUDA. | -| `cudaGraphicsResourceGetMappedMipmappedArray` | | Get a mipmapped array through which to access a mapped graphics resource. | -| `cudaGraphicsResourceGetMappedPointer` | | Get a device pointer through which to access a mapped graphics resource. | -| `cudaGraphicsResourceSetMapFlags` | | Set usage flags for mapping a graphics resource. | -| `cudaGraphicsSubResourceGetMappedArray` | | Get an array through which to access a subresource of a mapped graphics resource. | -| `cudaGraphicsUnmapResources` | | Unmap graphics resources. | -| `cudaGraphicsUnregisterResource` | | Unregisters a graphics resource for access by CUDA. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaGLGetDevices` | | +| `cudaGraphicsGLRegisterBuffer` | | +| `cudaGraphicsGLRegisterImage` | | +| `cudaWGLGetDevice` | | -## **13. Texture Reference Management** +## **13. OpenGL Interoperability [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaBindTexture` | | Binds a memory area to a texture. | -| `cudaBindTexture2D` | | Binds a 2D memory area to a texture. | -| `cudaBindTextureToArray` | | Binds an array to a texture. | -| `cudaBindTextureToMipmappedArray` | | Binds a mipmapped array to a texture. | -| `cudaCreateChannelDesc` | | Returns a channel descriptor using the specified format. | -| `cudaGetChannelDesc` | | Get the channel descriptor of an array. | -| `cudaGetTextureAlignmentOffset` | | Get the alignment offset of a texture. | -| `cudaGetTextureReference` | | Get the texture reference associated with a symbol. | -| `cudaUnbindTexture` | | Unbinds a texture. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaGLMapBufferObject` | | +| `cudaGLMapBufferObjectAsync` | | +| `cudaGLRegisterBufferObject` | | +| `cudaGLSetBufferObjectMapFlags` | | +| `cudaGLSetGLDevice` | | +| `cudaGLUnmapBufferObject` | | +| `cudaGLUnmapBufferObjectAsync` | | +| `cudaGLUnregisterBufferObject` | | -## **14. Surface Reference Management** +## **14. Direct3D 9 Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaBindSurfaceToArray` | | Binds an array to a surface. | -| `cudaGetSurfaceReference` | | Get the surface reference associated with a symbol. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D9GetDevice` | | +| `cudaD3D9GetDevices` | | +| `cudaD3D9GetDirect3DDevice` | | +| `cudaD3D9SetDirect3DDevice` | | +| `cudaGraphicsD3D9RegisterResource` | | -## **15. Texture Object Management** +## **15. Direct3D 9 Interoperability [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaCreateTextureObject` | | Creates a texture object. | -| `cudaDestroyTextureObject` | | Destroys a texture object. | -| `cudaGetTextureObjectResourceDesc` | | Returns a texture object's resource descriptor. | -| `cudaGetTextureObjectResourceViewDesc` | | Returns a texture object's resource view descriptor. | -| `cudaGetTextureObjectTextureDesc` | | Returns a texture object's texture descriptor. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D9MapResources` | | +| `cudaD3D9RegisterResource` | | +| `cudaD3D9ResourceGetMappedArray` | | +| `cudaD3D9ResourceGetMappedPitch` | | +| `cudaD3D9ResourceGetMappedPointer` | | +| `cudaD3D9ResourceGetMappedSize` | | +| `cudaD3D9ResourceGetSurfaceDimensions` | | +| `cudaD3D9ResourceSetMapFlags` | | +| `cudaD3D9UnmapResources` | | +| `cudaD3D9UnregisterResource` | | -## **16. Surface Object Management** +## **16. Direct3D 10 Interoperability** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaCreateSurfaceObject` | | Creates a surface object. | -| `cudaDestroySurfaceObject` | | Destroys a surface object. | -| `cudaGetSurfaceObjectResourceDesc` | | Returns a surface object's resource descriptor Returns the resource descriptor for the surface object specified by surfObject. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D10GetDevice` | | +| `cudaD3D10GetDevices` | | +| `cudaGraphicsD3D10RegisterResource` | | -## **17. Version Management** +## **17. Direct3D 10 Interoperability [DEPRECATED]** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaDriverGetVersion` | `hipDriverGetVersion` | Returns the CUDA driver version. | -| `cudaRuntimeGetVersion` | `hipRuntimeGetVersion` | Returns the CUDA Runtime version. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D10GetDirect3DDevice` | | +| `cudaD3D10MapResources` | | +| `cudaD3D10RegisterResource` | | +| `cudaD3D10ResourceGetMappedArray` | | +| `cudaD3D10ResourceGetMappedPitch` | | +| `cudaD3D10ResourceGetMappedPointer` | | +| `cudaD3D10ResourceGetMappedSize` | | +| `cudaD3D10ResourceGetSurfaceDimensions` | | +| `cudaD3D10ResourceSetMapFlags` | | +| `cudaD3D10SetDirect3DDevice` | | +| `cudaD3D10UnmapResources` | | +| `cudaD3D10UnregisterResource` | | -## **18. C++ API Routines** +## **18. Direct3D 11 Interoperability** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D11GetDevice` | | +| `cudaD3D11GetDevices` | | +| `cudaGraphicsD3D11RegisterResource` | | + +## **19. Direct3D 11 Interoperability [DEPRECATED]** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaD3D11GetDirect3DDevice` | | +| `cudaD3D11SetDirect3DDevice` | | + +## **20. VDPAU Interoperability** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaGraphicsVDPAURegisterOutputSurface` | | +| `cudaGraphicsVDPAURegisterVideoSurface` | | +| `cudaVDPAUGetDevice` | | +| `cudaVDPAUSetVDPAUDevice` | | + +## **21. EGL Interoperability** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaEGLStreamConsumerAcquireFrame` | | +| `cudaEGLStreamConsumerConnect` | | +| `cudaEGLStreamConsumerConnectWithFlags` | | +| `cudaEGLStreamConsumerReleaseFrame` | | +| `cudaEGLStreamProducerConnect` | | +| `cudaEGLStreamProducerDisconnect` | | +| `cudaEGLStreamProducerPresentFrame` | | +| `cudaEGLStreamProducerReturnFrame` | | +| `cudaGraphicsEGLRegisterImage` | | +| `cudaGraphicsResourceGetMappedEglFrame` | | + +## **22. Graphics Interoperability** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaGraphicsMapResources` | | +| `cudaGraphicsResourceGetMappedMipmappedArray` | | +| `cudaGraphicsResourceGetMappedPointer` | | +| `cudaGraphicsResourceSetMapFlags` | | +| `cudaGraphicsSubResourceGetMappedArray` | | +| `cudaGraphicsUnmapResources` | | +| `cudaGraphicsUnregisterResource` | | + +## **23. Texture Reference Management** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaBindTexture` | `hipBindTexture` | +| `cudaBindTexture2D` | `hipBindTexture2D` | +| `cudaBindTextureToArray` | `hipBindTextureToArray` | +| `cudaBindTextureToMipmappedArray` | | +| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | +| `cudaGetChannelDesc` | `hipGetChannelDesc` | +| `cudaGetTextureAlignmentOffset` | | +| `cudaGetTextureReference` | | +| `cudaUnbindTexture` | `hipUnbindTexture` | + +## **24. Surface Reference Management** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaBindSurfaceToArray` | | +| `cudaGetSurfaceReference` | | + +## **25. Texture Object Management** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaCreateTextureObject` |`hipCreateTextureObject` | +| `cudaDestroyTextureObject` |`hipDestroyTextureObject` | +| `cudaGetTextureObjectResourceDesc` |`hipGetTextureObjectResourceDesc` | +| `cudaGetTextureObjectResourceViewDesc` |`hipGetTextureObjectResourceViewDesc` | +| `cudaGetTextureObjectTextureDesc` |`hipGetTextureObjectTextureDesc` | + +## **26. Surface Object Management** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaCreateSurfaceObject` | | +| `cudaDestroySurfaceObject` | | +| `cudaGetSurfaceObjectResourceDesc` | | + +## **27. Version Management** + +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaDriverGetVersion` | `hipDriverGetVersion` | +| `cudaRuntimeGetVersion` | `hipRuntimeGetVersion` | + +## **28. C++ API Routines** *(7.0 contains, 7.5 doesn’t)* -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaBindSurfaceToArray` | | Binds an array to a surface. | -| `cudaBindTexture` | `hipBindTexture` | Binds a memory area to a texture. | -| `cudaBindTexture2D` | | Binds a 2D memory area to a texture. | -| `cudaBindTextureToArray` | | Binds an array to a texture. | -| `cudaBindTextureToMipmappedArray` | | Binds a mipmapped array to a texture. | -| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | Returns a channel descriptor using the specified format. | -| `cudaFuncGetAttributes` | | Find out attributes for a given function. | -| `cudaFuncSetCacheConfig` | | Sets the preferred cache configuration for a device function. | -| `cudaGetSymbolAddress` | | Finds the address associated with a CUDA symbol | -| `cudaGetSymbolSize` | | Finds the size of the object associated with a CUDA symbol. | -| `cudaGetTextureAlignmentOffset` | | Get the alignment offset of a texture. | -| `cudaLaunch` | | Launches a device function. | -| `cudaLaunchKernel` | | Launches a device function. | -| `cudaMallocHost` | | Allocates page-locked memory on the host | -| `cudaMallocManaged` | | Allocates memory that will be automatically managed by the Unified Memory system. | -| `cudaMemcpyFromSymbol` | | Copies data from the given symbol on the device. | -| `cudaMemcpyFromSymbolAsync` | | Copies data from the given symbol on the device. | -| `cudaMemcpyToSymbol` | | Copies data to the given symbol on the device. | -| `cudaMemcpyToSymbolAsync` | | Async copies data to the given symbol on the device. | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | Returns occupancy for a device function. | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | Returns occupancy for a device function with the specified flags. | -| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | Returns grid and block size that achieves maximum potential occupancy for a device function. | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | Returns grid and block size that achieves maximum potential occupancy for a device function. | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | Returns grid and block size that achieves maximum potential occupancy for a device function. | -| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | Returns grid and block size that achived maximum potential occupancy for a device function with the specified flags. | -| `cudaSetupArgument` | | Configure a device launch. | -| `cudaStreamAttachMemAsync` | | Attach memory to a stream asynchronously. | -| `cudaUnbindTexture` | `hipUnbindTexture` | Unbinds a texture. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaBindSurfaceToArray` | | +| `cudaBindTexture` | `hipBindTexture` | +| `cudaBindTexture2D` | | +| `cudaBindTextureToArray` | | +| `cudaBindTextureToMipmappedArray` | | +| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | +| `cudaFuncGetAttributes` | | +| `cudaFuncSetCacheConfig` | | +| `cudaGetSymbolAddress` | | +| `cudaGetSymbolSize` | | +| `cudaGetTextureAlignmentOffset` | | +| `cudaLaunch` | | +| `cudaLaunchKernel` | | +| `cudaMallocHost` | | +| `cudaMallocManaged` | | +| `cudaMemcpyFromSymbol` | | +| `cudaMemcpyFromSymbolAsync` | | +| `cudaMemcpyToSymbol` | | +| `cudaMemcpyToSymbolAsync` | | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | +| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | +| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | +| `cudaSetupArgument` | | +| `cudaStreamAttachMemAsync` | | +| `cudaUnbindTexture` | `hipUnbindTexture` | -## **19. Profiler Control** +## **30. Profiler Control** -| **CUDA** | **HIP** | **CUDA description** | -|-----------------------------------------------------------|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `cudaProfilerInitialize` | | Initialize the CUDA profiler. | -| `cudaProfilerStart` | `hipProfilerStart` | Enable profiling. | -| `cudaProfilerStop` | `hipProfilerStop` | Disable profiling. | +| **CUDA** | **HIP** | +|-----------------------------------------------------------|-------------------------------| +| `cudaProfilerInitialize` | | +| `cudaProfilerStart` | `hipProfilerStart` | +| `cudaProfilerStop` | `hipProfilerStop` | # Data types used by CUDA Runtime API and supported by HIP -## **20. Data types** +## **31. Data types** -| **type** | **CUDA** | **HIP** | **CUDA description** | -|-------------:|-----------------------------------------------|------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| struct | `cudaChannelFormatDesc` | `hipChannelFormatDesc` | CUDA Channel format descriptor. | -| struct | `cudaDeviceProp` | `hipDeviceProp_t` | CUDA device properties. | -| struct | `cudaExtent` | | CUDA extent (width, height, depth). | -| struct | `cudaFuncAttributes` | | CUDA function attributes. | -| struct | `cudaIpcEventHandle_t` | `hipIpcEventHandle_t` | CUDA IPC event handle. | -| struct | `cudaIpcMemHandle_t` | `hipIpcMemHandle_t` | CUDA IPC memory handle. | -| struct | `cudaMemcpy3DParms` | | CUDA 3D memory copying parameters. | -| struct | `cudaMemcpy3DPeerParms` | | CUDA 3D cross-device memory copying parameters. | -| struct | `cudaPitchedPtr` | | CUDA Pitched memory pointer. | -| struct | `cudaPointerAttributes` | `hipPointerAttribute_t` | CUDA pointer attributes. | -| struct | `cudaPos` | | CUDA 3D position. | -| struct | `cudaResourceDesc` | | CUDA resource descriptor. | -| struct | `cudaResourceViewDesc` | | CUDA resource view descriptor. | -| struct | `cudaTextureDesc` | | CUDA texture descriptor. | -| struct | `surfaceReference` | | CUDA Surface reference. | -| struct | `textureReference` | `textureReference` | CUDA texture reference. | -| enum |***`cudaChannelFormatKind`*** |***`hipChannelFormatKind`*** | Channel format kind. | -| 0 |*`cudaChannelFormatKindSigned`* |*`hipChannelFormatKindSigned`* | Signed channel format. | -| 1 |*`cudaChannelFormatKindUnsigned`* |*`hipChannelFormatKindUnsigned`* | Unsigned channel format. | -| 2 |*`cudaChannelFormatKindFloat`* |*`hipChannelFormatKindFloat`* | Float channel format. | -| 3 |*`cudaChannelFormatKindNone`* |*`hipChannelFormatKindNone`* | No channel format. | -| enum |***`cudaComputeMode`*** | | CUDA device compute modes. | -| 0 |*`cudaComputeModeDefault`* | | Default compute mode (Multiple threads can use ::cudaSetDevice() with this device). | -| 1 |*`cudaComputeModeExclusive`* | | Compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device). | -| 2 |*`cudaComputeModeProhibited`* | | Compute-prohibited mode (No threads can use ::cudaSetDevice() with this device). | -| 3 |*`cudaComputeModeExclusiveProcess`* | | Compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device). | -| enum |***`cudaDeviceAttr`*** |***`hipDeviceAttribute_t`*** | CUDA device attributes. | -| 1 |*`cudaDevAttrMaxThreadsPerBlock`* |*`hipDeviceAttributeMaxThreadsPerBlock`* | Maximum number of threads per block. | -| 2 |*`cudaDevAttrMaxBlockDimX`* |*`hipDeviceAttributeMaxBlockDimX`* | Maximum block dimension X. | -| 3 |*`cudaDevAttrMaxBlockDimY`* |*`hipDeviceAttributeMaxBlockDimY`* | Maximum block dimension Y. | -| 4 |*`cudaDevAttrMaxBlockDimZ`* |*`hipDeviceAttributeMaxBlockDimZ`* | Maximum block dimension Y. | -| 5 |*`cudaDevAttrMaxGridDimX`* |*`hipDeviceAttributeMaxGridDimX`* | Maximum grid dimension X. | -| 6 |*`cudaDevAttrMaxGridDimY`* |*`hipDeviceAttributeMaxGridDimY`* | Maximum grid dimension Y. | -| 7 |*`cudaDevAttrMaxGridDimZ`* |*`hipDeviceAttributeMaxGridDimZ`* | Maximum grid dimension Y. | -| 8 |*`cudaDevAttrMaxSharedMemoryPerBlock`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | Maximum shared memory available per block in bytes. | -| 9 |*`cudaDevAttrTotalConstantMemory`* |*`hipDeviceAttributeTotalConstantMemory`* | Memory available on device for \__constant__ variables in a CUDA C kernel in bytes. | -| 10 |*`cudaDevAttrWarpSize`* |*`hipDeviceAttributeWarpSize`* | Warp size in threads. | -| 11 |*`cudaDevAttrMaxPitch`* | | Maximum pitch in bytes allowed by memory copies. | -| 12 |*`cudaDevAttrMaxRegistersPerBlock`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | Maximum number of 32-bit registers available per block. | -| 13 |*`cudaDevAttrClockRate`* |*`hipDeviceAttributeClockRate`* | Peak clock frequency in kilohertz. | -| 14 |*`cudaDevAttrTextureAlignment`* | | Alignment requirement for textures. | -| 15 |*`cudaDevAttrGpuOverlap`* | | Device can possibly copy memory and execute a kernel concurrently. | -| 16 |*`cudaDevAttrMultiProcessorCount`* |*`hipDeviceAttributeMultiprocessorCount`* | Number of multiprocessors on device. | -| 17 |*`cudaDevAttrKernelExecTimeout`* | | Specifies whether there is a run time limit on kernels. | -| 18 |*`cudaDevAttrIntegrated`* | | Device is integrated with host memory. | -| 19 |*`cudaDevAttrCanMapHostMemory`* | | Device can map host memory into CUDA address space. | -| 20 |*`cudaDevAttrComputeMode`* |*`hipDeviceAttributeComputeMode`* | Compute mode (See cudaComputeMode for details). | -| 21 |*`cudaDevAttrMaxTexture1DWidth`* | | Maximum 1D texture width. | -| 22 |*`cudaDevAttrMaxTexture2DWidth`* | | Maximum 2D texture width. | -| 23 |*`cudaDevAttrMaxTexture2DHeight`* | | Maximum 2D texture height. | -| 24 |*`cudaDevAttrMaxTexture3DWidth`* | | Maximum 3D texture width. | -| 25 |*`cudaDevAttrMaxTexture3DHeight`* | | Maximum 3D texture height. | -| 26 |*`cudaDevAttrMaxTexture3DDepth`* | | Maximum 3D texture depth. | -| 27 |*`cudaDevAttrMaxTexture2DLayeredWidth`* | | Maximum 2D layered texture width. | -| 28 |*`cudaDevAttrMaxTexture2DLayeredHeight`* | | Maximum 2D layered texture height. | -| 29 |*`cudaDevAttrMaxTexture2DLayeredLayers`* | | Maximum layers in a 2D layered texture. | -| 30 |*`cudaDevAttrSurfaceAlignment`* | | Alignment requirement for surfaces. | -| 31 |*`cudaDevAttrConcurrentKernels`* |*`hipDeviceAttributeConcurrentKernels`* | Device can possibly execute multiple kernels concurrently. | -| 32 |*`cudaDevAttrEccEnabled`* | | Device has ECC support enabled. | -| 33 |*`cudaDevAttrPciBusId`* |*`hipDeviceAttributePciBusId`* | PCI bus ID of the device. | -| 34 |*`cudaDevAttrPciDeviceId`* |*`hipDeviceAttributePciDeviceId`* | PCI device ID of the device. | -| 35 |*`cudaDevAttrTccDriver`* | | Device is using TCC driver model. | -| 36 |*`cudaDevAttrMemoryClockRate`* |*`hipDeviceAttributeMemoryClockRate`* | Peak memory clock frequency in kilohertz. | -| 37 |*`cudaDevAttrGlobalMemoryBusWidth`* |*`hipDeviceAttributeMemoryBusWidth`* | Global memory bus width in bits. | -| 38 |*`cudaDevAttrL2CacheSize`* |*`hipDeviceAttributeL2CacheSize`* | Size of L2 cache in bytes. | -| 39 |*`cudaDevAttrMaxThreadsPerMultiProcessor`* |*`hipDeviceAttributeMaxThreadsPerMultiProcessor`* | Maximum resident threads per multiprocessor. | -| 40 |*`cudaDevAttrAsyncEngineCount`* | | Number of asynchronous engines. | -| 41 |*`cudaDevAttrUnifiedAddressing`* | | Device shares a unified address space with the host. | -| 42 |*`cudaDevAttrMaxTexture1DLayeredWidth`* | | Maximum 1D layered texture width. | -| 43 |*`cudaDevAttrMaxTexture1DLayeredLayers`* | | Maximum layers in a 1D layered texture. | -| 44 | | | *reserved* | -| 45 |*`cudaDevAttrMaxTexture2DGatherWidth`* | | Maximum 2D texture width if cudaArrayTextureGather is set. | -| 46 |*`cudaDevAttrMaxTexture2DGatherHeight`* | | Maximum 2D texture height if cudaArrayTextureGather is set. | -| 47 |*`cudaDevAttrMaxTexture3DWidthAlt`* | | Alternate maximum 3D texture width. | -| 48 |*`cudaDevAttrMaxTexture3DHeightAlt`* | | Alternate maximum 3D texture height. | -| 49 |*`cudaDevAttrMaxTexture3DDepthAlt`* | | Alternate maximum 3D texture depth. | -| 50 |*`cudaDevAttrPciDomainId`* | | PCI domain ID of the device. | -| 51 |*`cudaDevAttrTexturePitchAlignment`* | | Pitch alignment requirement for textures. | -| 52 |*`cudaDevAttrMaxTextureCubemapWidth`* | | Maximum cubemap texture width/height. | -| 53 |*`cudaDevAttrMaxTextureCubemapLayeredWidth`* | | Maximum cubemap layered texture width/height. | -| 54 |*`cudaDevAttrMaxTextureCubemapLayeredLayers`* | | Maximum layers in a cubemap layered texture. | -| 55 |*`cudaDevAttrMaxSurface1DWidth`* | | Maximum 1D surface width. | -| 56 |*`cudaDevAttrMaxSurface2DWidth`* | | Maximum 2D surface width. | -| 57 |*`cudaDevAttrMaxSurface2DHeight`* | | Maximum 2D surface height. | -| 58 |*`cudaDevAttrMaxSurface3DWidth`* | | Maximum 3D surface width. | -| 59 |*`cudaDevAttrMaxSurface3DHeight`* | | Maximum 3D surface height. | -| 60 |*`cudaDevAttrMaxSurface3DDepth`* | | Maximum 3D surface depth. | -| 61 |*`cudaDevAttrMaxSurface1DLayeredWidth`* | | Maximum 1D layered surface width. | -| 62 |*`cudaDevAttrMaxSurface1DLayeredLayers`* | | Maximum layers in a 1D layered surface. | -| 63 |*`cudaDevAttrMaxSurface2DLayeredWidth`* | | Maximum 2D layered surface width. | -| 64 |*`cudaDevAttrMaxSurface2DLayeredHeight`* | | Maximum 2D layered surface height. | -| 65 |*`cudaDevAttrMaxSurface2DLayeredLayers`* | | Maximum layers in a 2D layered surface. | -| 66 |*`cudaDevAttrMaxSurfaceCubemapWidth`* | | Maximum cubemap surface width. | -| 67 |*`cudaDevAttrMaxSurfaceCubemapLayeredWidth`* | | Maximum cubemap layered surface width. | -| 68 |*`cudaDevAttrMaxSurfaceCubemapLayeredLayers`* | | Maximum layers in a cubemap layered surface. | -| 69 |*`cudaDevAttrMaxTexture1DLinearWidth`* | | Maximum 1D linear texture width. | -| 70 |*`cudaDevAttrMaxTexture2DLinearWidth`* | | Maximum 2D linear texture width. | -| 71 |*`cudaDevAttrMaxTexture2DLinearHeight`* | | Maximum 2D linear texture height. | -| 72 |*`cudaDevAttrMaxTexture2DLinearPitch`* | | Maximum 2D linear texture pitch in bytes. | -| 73 |*`cudaDevAttrMaxTexture2DMipmappedWidth`* | | Maximum mipmapped 2D texture width. | -| 74 |*`cudaDevAttrMaxTexture2DMipmappedHeight`* | | Maximum mipmapped 2D texture height. | -| 75 |*`cudaDevAttrComputeCapabilityMajor`* |*`hipDeviceAttributeComputeCapabilityMajor`* | Major compute capability version number. | -| 76 |*`cudaDevAttrComputeCapabilityMinor`* |*`hipDeviceAttributeComputeCapabilityMinor`* | Minor compute capability version number. | -| 77 |*`cudaDevAttrMaxTexture1DMipmappedWidth`* | | Maximum mipmapped 1D texture width. | -| 78 |*`cudaDevAttrStreamPrioritiesSupported`* | | Device supports stream priorities. | -| 79 |*`cudaDevAttrGlobalL1CacheSupported`* | | Device supports caching globals in L1. | -| 80 |*`cudaDevAttrLocalL1CacheSupported`* | | Device supports caching locals in L1. | -| 81 |*`cudaDevAttrMaxSharedMemoryPerMultiprocessor`*|*`hipDeviceAttributeMaxSharedMemoryPerMultiprocessor`*| Maximum shared memory available per multiprocessor in bytes. | -| 82 |*`cudaDevAttrMaxRegistersPerMultiprocessor`* | | Maximum number of 32-bit registers available per multiprocessor. | -| 83 |*`cudaDevAttrManagedMemory`* | | Device can allocate managed memory on this system. | -| 84 |*`cudaDevAttrIsMultiGpuBoard`* |*`hipDeviceAttributeIsMultiGpuBoard`* | Device is on a multi-GPU board. | -| 85 |*`cudaDevAttrMultiGpuBoardGroupID`* | | Unique identifier for a group of devices on the same multi-GPU board. | -| enum |***`cudaError`*** |***`hipError_t`*** | CUDA Error types. | -| enum |***`cudaError_t`*** |***`hipError_t`*** | CUDA Error types. | -| 0 |*`cudaSuccess`* |*`hipSuccess`* | The API call returned with no errors. In the case of query calls, this can also mean that the operation being queried is complete.| -| 1 |*`cudaErrorMissingConfiguration`* | | The device function being invoked (usually via cudaLaunchKernel()) was not previously configured via the cudaConfigureCall() function.| -| 2 |*`cudaErrorMemoryAllocation`* |*`hipErrorMemoryAllocation`* | The API call failed because it was unable to allocate enough memory to perform the requested operation. | -| 3 |*`cudaErrorInitializationError`* |*`hipErrorInitializationError`* | The API call failed because the CUDA driver and runtime could not be initialized. | -| 4 |*`cudaErrorLaunchFailure`* | | An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. The device cannot be used until cudaThreadExit() is called. All existing device memory allocations are invalid and must be reconstructed if the program is to continue using CUDA. | -| 5 |*`cudaErrorPriorLaunchFailure`* | | This indicated that a previous kernel launch failed. This was previously used for device emulation of kernel launches. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 6 |*`cudaErrorLaunchTimeout`* | | This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device property kernelExecTimeoutEnabled for more information. The device cannot be used until cudaThreadExit() is called. All existing device memory allocations are invalid and must be reconstructed if the program is to continue using CUDA.| -| 7 |*`cudaErrorLaunchOutOfResources`* |*`hipErrorLaunchOutOfResources`* | This indicates that a launch did not occur because it did not have appropriate resources. Although this error is similar to cudaErrorInvalidConfiguration, this error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count.| -| 8 |*`cudaErrorInvalidDeviceFunction`* | | The requested device function does not exist or is not compiled for the proper device architecture. | -| 9 |*`cudaErrorInvalidConfiguration`* | | This indicates that a kernel launch is requesting resources that can never be satisfied by the current device. Requesting more shared memory per block than the device supports will trigger this error, as will requesting too many threads or blocks. See cudaDeviceProp for more device limitations.| -| 10 |*`cudaErrorInvalidDevice`* |*`hipErrorInvalidDevice`* | This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device. | -| 11 |*`cudaErrorInvalidValue`* |*`hipErrorInvalidValue`* | This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values. | -| 12 |*`cudaErrorInvalidPitchValue`* | | This indicates that one or more of the pitch-related parameters passed to the API call is not within the acceptable range for pitch.| -| 13 |*`cudaErrorInvalidSymbol`* | | This indicates that the symbol name/identifier passed to the API call is not a valid name or identifier. | -| 14 |*`cudaErrorMapBufferObjectFailed`* | | This indicates that the buffer object could not be mapped. | -| 15 |*`cudaErrorUnmapBufferObjectFailed`* | | This indicates that the buffer object could not be unmapped. | -| 16 |*`cudaErrorInvalidHostPointer`* | | This indicates that at least one host pointer passed to the API call is not a valid host pointer. | -| 17 |*`cudaErrorInvalidDevicePointer`* |*`hipErrorInvalidDevicePointer`* | This indicates that at least one host pointer passed to the API call is not a valid host pointer. | -| 18 |*`cudaErrorInvalidTexture`* | | This indicates that the texture passed to the API call is not a valid texture. | -| 19 |*`cudaErrorInvalidTextureBinding`* | | This indicates that the texture binding is not valid. This occurs if you call cudaGetTextureAlignmentOffset() with an unbound texture.| -| 20 |*`cudaErrorInvalidChannelDescriptor`* | | This indicates that the channel descriptor passed to the API call is not valid. This occurs if the format is not one of the formats specified by cudaChannelFormatKind, or if one of the dimensions is invalid.| -| 21 |*`cudaErrorInvalidMemcpyDirection`* | | This indicates that the direction of the memcpy passed to the API call is not one of the types specified by cudaMemcpyKind. | -| 22 |*`cudaErrorAddressOfConstant`* | | This indicated that the user has taken the address of a constant variable, which was forbidden up until the CUDA 3.1 release. Deprecated This error return is deprecated as of CUDA 3.1. Variables in constant memory may now have their address taken by the runtime via cudaGetSymbolAddress().| -| 23 |*`cudaErrorTextureFetchFailed`* | | This indicated that a texture fetch was not able to be performed. This was previously used for device emulation of texture operations. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 24 |*`cudaErrorTextureNotBound`* | | This indicated that a texture was not bound for access. This was previously used for device emulation of texture operations. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 25 |*`cudaErrorSynchronizationError`* | | This indicated that a synchronization operation had failed. This was previously used for some device emulation functions. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 26 |*`cudaErrorInvalidFilterSetting`* | | This indicates that a non-float texture was being accessed with linear filtering. This is not supported by CUDA. | -| 27 |*`cudaErrorInvalidNormSetting`* | | This indicates that an attempt was made to read a non-float texture as a normalized float. This is not supported by CUDA. | -| 28 |*`cudaErrorMixedDeviceExecution`* | | Mixing of device and device emulation code was not allowed. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 29 |*`cudaErrorCudartUnloading`* | | This indicates that a CUDA Runtime API call cannot be executed because it is being called during process shut down, at a point in time after CUDA driver has been unloaded.| -| 30 |*`cudaErrorUnknown`* |*`hipErrorUnknown`* | This indicates that an unknown internal error has occurred. | -| 31 |*`cudaErrorNotYetImplemented`* | | This indicates that the API call is not yet implemented. Production releases of CUDA will never return this error. Deprecated This error return is deprecated as of CUDA 4.1.| -| 32 |*`cudaErrorMemoryValueTooLarge`* | | This indicated that an emulated device pointer exceeded the 32-bit address range. Deprecated This error return is deprecated as of CUDA 3.1. Device emulation mode was removed with the CUDA 3.1 release.| -| 33 |*`cudaErrorInvalidResourceHandle`* |*`hipErrorInvalidResourceHandle`* | This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like cudaStream_t and cudaEvent_t.| -| 34 |*`cudaErrorNotReady`* |*`hipErrorNotReady`* | This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than cudaSuccess (which indicates completion). Calls that may return this value include cudaEventQuery() and cudaStreamQuery().| -| 35 |*`cudaErrorInsufficientDriver`* | | This indicates that the installed NVIDIA CUDA driver is older than the CUDA runtime library. This is not a supported configuration. Users should install an updated NVIDIA display driver to allow the application to run.| -| 36 |*`cudaErrorSetOnActiveProcess`* | | This indicates that the user has called cudaSetValidDevices(), cudaSetDeviceFlags(), cudaD3D9SetDirect3DDevice(), cudaD3D10SetDirect3DDevice, cudaD3D11SetDirect3DDevice(), or cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by calling non-device management operations (allocating memory and launching kernels are examples of non-device management operations). This error can also be returned if using runtime/driver interoperability and there is an existing CUcontext active on the host thread.| -| 37 |*`cudaErrorInvalidSurface`* | | This indicates that the surface passed to the API call is not a valid surface. | -| 38 |*`cudaErrorNoDevice`* |*`hipErrorNoDevice`* | This indicates that no CUDA-capable devices were detected by the installed CUDA driver. | -| 39 |*`cudaErrorECCUncorrectable`* | | This indicates that an uncorrectable ECC error was detected during execution. | -| 40 |*`cudaErrorSharedObjectSymbolNotFound`* | | This indicates that a link to a shared object failed to resolve. | -| 41 |*`cudaErrorSharedObjectInitFailed`* | | This indicates that initialization of a shared object failed. | -| 42 |*`cudaErrorUnsupportedLimit`* |*`hipErrorUnsupportedLimit`* | This indicates that the cudaLimit passed to the API call is not supported by the active device. | -| 43 |*`cudaErrorDuplicateVariableName`* | | This indicates that multiple global or constant variables (across separate CUDA source files in the application) share the same string name.| -| 44 |*`cudaErrorDuplicateTextureName`* | | This indicates that multiple textures (across separate CUDA source files in the application) share the same string name. | -| 45 |*`cudaErrorDuplicateSurfaceName`* | | This indicates that multiple surfaces (across separate CUDA source files in the application) share the same string name. | -| 46 |*`cudaErrorDevicesUnavailable`* | | This indicates that all CUDA devices are busy or unavailable at the current time. Devices are often busy/unavailable due to use of cudaComputeModeExclusive, cudaComputeModeProhibited or when long running CUDA kernels have filled up the GPU and are blocking new work from starting. They can also be unavailable due to memory constraints on a device that already has active CUDA work being performed.| -| 47 |*`cudaErrorInvalidKernelImage`* | | This indicates that the device kernel image is invalid. | -| 48 |*`cudaErrorNoKernelImageForDevice`* | | This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration.| -| 49 |*`cudaErrorIncompatibleDriverContext`* | | This indicates that the current context is not compatible with this the CUDA Runtime. This can only occur if you are using CUDA Runtime/Driver interoperability and have created an existing Driver context using the driver API. The Driver context may be incompatible either because the Driver context was created using an older version of the API, because the Runtime API call expects a primary driver context and the Driver context is not primary, or because the Driver context has been destroyed. Please see Interactions with the CUDA Driver API" for more information.| -| 50 |*`cudaErrorPeerAccessAlreadyEnabled`* |*`hipErrorPeerAccessAlreadyEnabled`* | This error indicates that a call to cudaDeviceEnablePeerAccess() is trying to re-enable peer addressing on from a context which has already had peer addressing enabled.| -| 51 |*`cudaErrorPeerAccessNotEnabled`* |*`hipErrorPeerAccessNotEnabled`* | This error indicates that a call to cudaDeviceEnablePeerAccess() is trying to re-enable peer addressing on from a context which has already had peer addressing enabled.| -| 52 | | | *reserved* | -| 53 | | | *reserved* | -| 54 |*`cudaErrorDeviceAlreadyInUse`* | | This indicates that a call tried to access an exclusive-thread device that is already in use by a different thread. | -| 55 |*`cudaErrorProfilerDisabled`* | | This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler.| -| 56 |*`cudaErrorProfilerNotInitialized`* | | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the profiling via cudaProfilerStart or cudaProfilerStop without initialization.| -| 57 |*`cudaErrorProfilerAlreadyStarted`* | | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStart() when profiling is already enabled.| -| 58 |*`cudaErrorProfilerAlreadyStopped`* | | Deprecated This error return is deprecated as of CUDA 5.0. It is no longer an error to call cudaProfilerStop() when profiling is already disabled.| -| 59 |*`cudaErrorAssert`* | | An assert triggered in device code during kernel execution. The device cannot be used again until cudaThreadExit() is called. All existing allocations are invalid and must be reconstructed if the program is to continue using CUDA.| -| 60 |*`cudaErrorTooManyPeers`* | | This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to cudaEnablePeerAccess().| -| 61 |*`cudaErrorHostMemoryAlreadyRegistered`* | *`hipErrorHostMemoryAlreadyRegistered`* | This error indicates that the memory range passed to cudaHostRegister() has already been registered. | -| 62 |*`cudaErrorHostMemoryNotRegistered`* | *`hipErrorHostMemoryNotRegistered`* | This error indicates that the pointer passed to cudaHostUnregister() does not correspond to any currently registered memory region.| -| 63 |*`cudaErrorOperatingSystem`* | | This error indicates that an OS call failed. | -| 64 |*`cudaErrorPeerAccessUnsupported`* | | This error indicates that P2P access is not supported across the given devices. | -| 65 |*`cudaErrorLaunchMaxDepthExceeded`* | | This error indicates that a device runtime grid launch did not occur because the depth of the child grid would exceed the maximum supported number of nested grid launches.| -| 66 |*`cudaErrorLaunchFileScopedTex`* | | This error indicates that a grid launch did not occur because the kernel uses filescoped textures which are unsupported by the device runtime. Kernels launched via the device runtime only support textures created with the Texture Object API's.| -| 67 |*`cudaErrorLaunchFileScopedSurf`* | | This error indicates that a grid launch did not occur because the kernel uses filescoped surfaces which are unsupported by the device runtime. Kernels launched via the device runtime only support surfaces created with the Surface Object API's.| -| 68 |*`cudaErrorSyncDepthExceeded`* | | This error indicates that a call to cudaDeviceSynchronize made from the device runtime failed because the call was made at grid depth greater than than either the default (2 levels of grids) or user specified device limit cudaLimitDevRuntimeSyncDepth. To be able to synchronize on launched grids at a greater depth successfully, the maximum nested depth at which cudaDeviceSynchronize will be called must be specified with the cudaLimitDevRuntimeSyncDepth limit to the cudaDeviceSetLimit api before the host-side launch of a kernel using the device runtime. Keep in mind that additional levels of sync depth require the runtime to reserve large amounts of device memory that cannot be used for user allocations.| -| 69 |*`cudaErrorLaunchPendingCountExceeded`* | | This error indicates that a device runtime grid launch failed because the launch would exceed the limit cudaLimitDevRuntimePendingLaunchCount. For this launch to proceed successfully, cudaDeviceSetLimit must be called to set the cudaLimitDevRuntimePendingLaunchCount to be higher than the upper bound of outstanding launches that can be issued to the device runtime. Keep in mind that raising the limit of pending device runtime launches will require the runtime to reserve device memory that cannot be used for user allocations.| -| 70 |*`cudaErrorNotPermitted`* | | This error indicates the attempted operation is not permitted. | -| 71 |*`cudaErrorNotSupported`* | | This error indicates the attempted operation is not supported on the current system or device. | -| 72 |*`cudaErrorHardwareStackError`* | | Device encountered an error in the call stack during kernel execution, possibly due to stack corruption or exceeding the stack size limit. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 73 |*`cudaErrorIllegalInstruction`* | | The device encountered an illegal instruction during kernel execution The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 74 |*`cudaErrorMisalignedAddress`* | | The device encountered a load or store instruction on a memory address which is not aligned. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 75 |*`cudaErrorInvalidAddressSpace`* | | While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 76 |*`cudaErrorInvalidPc`* | | The device encountered an invalid program counter. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 77 |*`cudaErrorIllegalAddress`* | | The device encountered a load or store instruction on an invalid memory address. The context cannot be used, so it must be destroyed (and a new one should be created). All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.| -| 78 |*`cudaErrorInvalidPtx`* | | A PTX compilation failed. The runtime may fall back to compiling PTX if an application does not contain a suitable binary for the current device.| -| 79 |*`cudaErrorInvalidGraphicsContext`* | | This indicates an error with the OpenGL or DirectX context. | -| 0x7f |*`cudaErrorStartupFailure`* | | This indicates an internal startup failure in the CUDA runtime. | -| 1000 |*`cudaErrorApiFailureBase`* | | Any unhandled CUDA driver error is added to this value and returned via the runtime. Production releases of CUDA should not return such errors. Deprecated This error return is deprecated as of CUDA 4.1.| -| enum |***`cudaFuncCache`*** |***`hipFuncCache_t`*** | CUDA function cache configurations. | -| 0 |*`cudaFuncCachePreferNone`* |*`hipFuncCachePreferNone`* | Default function cache configuration, no preference. | -| 1 |*`cudaFuncCachePreferShared`* |*`hipFuncCachePreferShared`* | Prefer larger shared memory and smaller L1 cache. | -| 2 |*`cudaFuncCachePreferL1`* |*`hipFuncCachePreferL1`* | Prefer larger L1 cache and smaller shared memory. | -| 3 |*`cudaFuncCachePreferEqual`* |*`hipFuncCachePreferEqual`* | Prefer equal size L1 cache and shared memory. | -| enum |***`cudaGraphicsCubeFace`*** | | CUDA graphics interop array indices for cube maps. | -| 0x00 |*`cudaGraphicsCubeFacePositiveX`* | | Positive X face of cubemap. | -| 0x01 |*`cudaGraphicsCubeFaceNegativeX`* | | Negative X face of cubemap. | -| 0x02 |*`cudaGraphicsCubeFacePositiveY`* | | Positive Y face of cubemap. | -| 0x03 |*`cudaGraphicsCubeFaceNegativeY`* | | Negative Y face of cubemap. | -| 0x04 |*`cudaGraphicsCubeFacePositiveZ`* | | Positive Z face of cubemap. | -| 0x05 |*`cudaGraphicsCubeFaceNegativeZ`* | | Negative Z face of cubemap. | -| enum |***`cudaGraphicsMapFlags`*** | | CUDA graphics interop map flags. | -| 0 |*`cudaGraphicsMapFlagsNone`* | | Default; Assume resource can be read/written. | -| 1 |*`cudaGraphicsMapFlagsReadOnly`* | | CUDA will not write to this resource. | -| 2 |*`cudaGraphicsMapFlagsWriteDiscard`* | | CUDA will only write to and will not read from this resource. | -| enum |***`cudaGraphicsRegisterFlags`*** | | CUDA graphics interop register flags. | -| 0 |*`cudaGraphicsRegisterFlagsNone`* | | Default. | -| 1 |*`cudaGraphicsRegisterFlagsReadOnly`* | | CUDA will not write to this resource. | -| 2 |*`cudaGraphicsRegisterFlagsWriteDiscard`* | | CUDA will only write to and will not read from this resource. | -| 4 |*`cudaGraphicsRegisterFlagsSurfaceLoadStore`* | | CUDA will bind this resource to a surface reference. | -| 8 |*`cudaGraphicsRegisterFlagsTextureGather`* | | CUDA will perform texture gather operations on this resource. | -| enum |***`cudaLimit`*** |***`hipLimit_t`*** | CUDA Limits. | -| 0x00 |*`cudaLimitStackSize`* | | GPU thread stack size. | -| 0x01 |*`cudaLimitPrintfFifoSize`* | | GPU printf/fprintf FIFO size. | -| 0x02 |*`cudaLimitMallocHeapSize`* |*`hipLimitMallocHeapSize`* | GPU malloc heap size. | -| 0x03 |*`cudaLimitDevRuntimeSyncDepth`* | | GPU device runtime synchronize depth. | -| 0x04 |*`cudaLimitDevRuntimePendingLaunchCount`* | | GPU device runtime pending launch count. | -| enum |***`cudaMemcpyKind`*** |***`hipMemcpyKind`*** | CUDA memory copy types. | -| 0 |*`cudaMemcpyHostToHost`* |*`hipMemcpyHostToHost`* | Host -> Host. | -| 1 |*`cudaMemcpyHostToDevice`* |*`hipMemcpyHostToDevice`* | Host -> Device. | -| 2 |*`cudaMemcpyDeviceToHost`* |*`hipMemcpyDeviceToHost`* | Device -> Host. | -| 3 |*`cudaMemcpyDeviceToDevice`* |*`hipMemcpyDeviceToDevice`* | Device -> Device. | -| 4 |*`cudaMemcpyDefault`* |*`hipMemcpyDefault`* | Default based unified virtual address space. | -| enum |***`cudaMemoryType`*** |***`hipMemoryType`*** | CUDA memory types. | -| 1 |*`cudaMemoryTypeHost`* |*`hipMemoryTypeHost`* | Host memory. | -| 2 |*`cudaMemoryTypeDevice`* |*`hipMemoryTypeDevice`* | Device memory. | -| enum |***`cudaResourceType`*** | | CUDA resource types. | -| 0 |*`cudaResourceTypeArray`* | | Array resource. | -| 1 |*`cudaResourceTypeMipmappedArray`* | | Mipmapped array resource. | -| 2 |*`cudaResourceTypeLinear`* | | Linear resource. | -| 3 |*`cudaResourceTypePitch2D`* | | Pitch 2D resource. | -| enum |***`cudaResourceViewFormat`*** | | CUDA texture resource view formats. | -| 0x00 |*`cudaResViewFormatNone`* | | No resource view format (use underlying resource format). | -| 0x01 |*`cudaResViewFormatUnsignedChar1`* | | 1 channel unsigned 8-bit integers. | -| 0x02 |*`cudaResViewFormatUnsignedChar2`* | | 2 channel unsigned 8-bit integers. | -| 0x03 |*`cudaResViewFormatUnsignedChar4`* | | 4 channel unsigned 8-bit integers. | -| 0x04 |*`cudaResViewFormatSignedChar1`* | | 1 channel signed 8-bit integers. | -| 0x05 |*`cudaResViewFormatSignedChar2`* | | 2 channel signed 8-bit integers. | -| 0x06 |*`cudaResViewFormatSignedChar4`* | | 4 channel signed 8-bit integers. | -| 0x07 |*`cudaResViewFormatUnsignedShort1`* | | 1 channel unsigned 16-bit integers. | -| 0x08 |*`cudaResViewFormatUnsignedShort2`* | | 2 channel unsigned 16-bit integers. | -| 0x09 |*`cudaResViewFormatUnsignedShort4`* | | 4 channel unsigned 16-bit integers. | -| 0x0a |*`cudaResViewFormatSignedShort1`* | | 1 channel signed 16-bit integers. | -| 0x0b |*`cudaResViewFormatSignedShort2`* | | 2 channel signed 16-bit integers. | -| 0x0c |*`cudaResViewFormatSignedShort4`* | | 4 channel signed 16-bit integers. | -| 0x0d |*`cudaResViewFormatUnsignedInt1`* | | 1 channel unsigned 32-bit integers. | -| 0x0e |*`cudaResViewFormatUnsignedInt2`* | | 2 channel unsigned 32-bit integers. | -| 0x0f |*`cudaResViewFormatUnsignedInt4`* | | 4 channel unsigned 32-bit integers. | -| 0x10 |*`cudaResViewFormatSignedInt1`* | | 1 channel signed 32-bit integers. | -| 0x11 |*`cudaResViewFormatSignedInt2`* | | 2 channel signed 32-bit integers. | -| 0x12 |*`cudaResViewFormatSignedInt4`* | | 4 channel signed 32-bit integers. | -| 0x13 |*`cudaResViewFormatHalf1`* | | 1 channel 16-bit floating point. | -| 0x14 |*`cudaResViewFormatHalf2`* | | 2 channel 16-bit floating point. | -| 0x15 |*`cudaResViewFormatHalf4`* | | 4 channel 16-bit floating point. | -| 0x16 |*`cudaResViewFormatFloat1`* | | 1 channel 32-bit floating point. | -| 0x17 |*`cudaResViewFormatFloat2`* | | 2 channel 32-bit floating point. | -| 0x18 |*`cudaResViewFormatFloat4`* | | 4 channel 32-bit floating point. | -| 0x19 |*`cudaResViewFormatUnsignedBlockCompressed1`* | | Block compressed 1. | -| 0x1a |*`cudaResViewFormatUnsignedBlockCompressed2`* | | Block compressed 2. | -| 0x1b |*`cudaResViewFormatUnsignedBlockCompressed3`* | | Block compressed 3. | -| 0x1c |*`cudaResViewFormatUnsignedBlockCompressed4`* | | Block compressed 4 unsigned. | -| 0x1d |*`cudaResViewFormatSignedBlockCompressed4`* | | Block compressed 4 signed. | -| 0x1e |*`cudaResViewFormatUnsignedBlockCompressed5`* | | Block compressed 5 unsigned. | -| 0x1f |*`cudaResViewFormatSignedBlockCompressed5`* | | Block compressed 5 signed. | -| 0x20 |*`cudaResViewFormatUnsignedBlockCompressed6H`* | | Block compressed 6 unsigned half-float. | -| 0x21 |*`cudaResViewFormatSignedBlockCompressed6H`* | | Block compressed 6 signed half-float. | -| 0x22 |*`cudaResViewFormatUnsignedBlockCompressed7`* | | Block compressed 7. | -| enum |***`cudaSharedMemConfig`*** |***`hipSharedMemConfig`*** | CUDA shared memory configuration. | -| 0 |*`cudaSharedMemBankSizeDefault`* |*`hipSharedMemBankSizeDefault`* | | -| 1 |*`cudaSharedMemBankSizeFourByte`* |*`hipSharedMemBankSizeFourByte`* | | -| 2 |*`cudaSharedMemBankSizeEightByte`* |*`hipSharedMemBankSizeEightByte`* | | -| enum |***`cudaSurfaceBoundaryMode`*** | | CUDA Surface boundary modes. | -| 0 |*`cudaBoundaryModeZero`* | | Zero boundary mode. | -| 1 |*`cudaBoundaryModeClamp`* | | Clamp boundary mode. | -| 2 |*`cudaBoundaryModeTrap`* | | Trap boundary mode. | -| enum |***`cudaSurfaceFormatMode`*** | | CUDA Surface format modes. | -| 0 |*`cudaFormatModeForced`* | | Forced format mode. | -| 1 |*`cudaFormatModeAuto`* | | Auto format mode. | -| enum |***`cudaTextureAddressMode`*** | | CUDA texture address modes. | -| 0 |*`cudaAddressModeWrap`* | | Wrapping address mode. | -| 1 |*`cudaAddressModeClamp`* | | Clamp to edge address mode. | -| 2 |*`cudaAddressModeMirror`* | | Mirror address mode. | -| 3 |*`cudaAddressModeBorder`* | | Border address mode. | -| enum |***`cudaTextureFilterMode`*** |***`hipTextureFilterMode`*** | Point filter mode. | -| 0 |*`cudaFilterModePoint`* |*`hipFilterModePoint`* | Linear filter mode. | -| 1 |*`cudaFilterModeLinear`* | | Clamp to edge address mode. | -| enum |***`cudaTextureReadMode`*** |***`hipTextureReadMode`*** | CUDA texture read modes. | -| 0 |*`cudaReadModeElementType`* |*`hipReadModeElementType`* | Read texture as specified element type. | -| 1 |*`cudaReadModeNormalizedFloat`* | | Read texture as normalized float. | -| struct | `cudaArray` | `hipArray` | CUDA array [opaque]. | -| typedef | `cudaArray_t` | `hipArray *` | CUDA array pointer. | -| typedef | `cudaArray_const_t` | `const hipArray *` | CUDA array (as source copy argument). | -| enum | `cudaError` | `hipError_t` | CUDA Error types. | -| typedef | `cudaError_t` | `hipError_t` | CUDA Error types. | -| typedef | `cudaEvent_t` | `hipEvent_t` | CUDA event types. | -| typedef | `cudaGraphicsResource_t` | | CUDA graphics resource types. | -| typedef | `cudaMipmappedArray_t` | | CUDA mipmapped array. | -| typedef | `cudaMipmappedArray_const_t` | | CUDA mipmapped array (as source argument). | -| enum |***`cudaOutputMode`*** | | CUDA Profiler Output modes. | -| 0x00 |*`cudaKeyValuePair`* | | Output mode Key-Value pair format. | -| 0x01 |*`cudaCSV`* | | Output mode Comma separated values format. | -| typedef | `cudaOutputMode_t` | | CUDA output file modes. | -| typedef | `cudaStream_t` | `hipStream_t` | CUDA stream. | -| typedef | `cudaSurfaceObject_t` | | An opaque value that represents a CUDA Surface object. | -| typedef | `cudaTextureObject_t` | | An opaque value that represents a CUDA texture object. | -| typedef | `CUuuid_stcudaUUID_t` | | CUDA UUID types. | -| define | `CUDA_IPC_HANDLE_SIZE` | | CUDA IPC Handle Size. | -| define | `cudaArrayCubemap` | | Must be set in cudaMalloc3DArray to create a cubemap CUDA array. | -| define | `cudaArrayDefault` | | Default CUDA array allocation flag. | -| define | `cudaArrayLayered` | | Must be set in cudaMalloc3DArray to create a layered CUDA array. | -| define | `cudaArraySurfaceLoadStore` | | Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind surfaces to the CUDA array. | -| define | `cudaArrayTextureGather` | | Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform texture gather operations on the CUDA array. | -| define | `cudaDeviceBlockingSync` | `hipDeviceScheduleBlockingSync` | Device flag - Use blocking synchronization. Deprecated as of CUDA 4.0 and replaced with cudaDeviceScheduleBlockingSync. | -| define | `cudaDeviceLmemResizeToMax` | | Device flag - Keep local memory allocation after launch. | -| define | `cudaDeviceMapHost` | | Device flag - Support mapped pinned allocations. | -| define | `cudaDeviceMask` | | Device flags mask. | -| define | `cudaDevicePropDontCare` | | Empty device properties. | -| define | `cudaDeviceScheduleAuto` | `hipDeviceScheduleAuto` | Device flag - Automatic scheduling. | -| define | `cudaDeviceScheduleBlockingSync` | `hipDeviceScheduleBlockingSync` | Device flag - Use blocking synchronization. | -| define | `cudaDeviceScheduleMask` | `hipDeviceScheduleMask` | Device schedule flags mask. | -| define | `cudaDeviceScheduleSpin` | `hipDeviceScheduleSpin` | Device flag - Spin default scheduling. | -| define | `cudaDeviceScheduleYield` | `hipDeviceScheduleYield` | Device flag - Yield default scheduling. | -| define | `cudaEventDefault` | `hipEventDefault` | Default event flag. | -| define | `cudaEventDisableTiming` | `hipEventDisableTiming` | Event will not record timing data. | -| define | `cudaEventInterprocess` | `hipEventInterprocess` | Event is suitable for interprocess use. cudaEventDisableTiming must be set. | -| define | `cudaHostAllocDefault` | `hipHostMallocDefault` | Default page-locked allocation flag. | -| define | `cudaHostAllocMapped` | `hipHostMallocMapped` | Map allocation into device space. | -| define | `cudaHostAllocPortable` | `hipHostMallocPortable` | Pinned memory accessible by all CUDA contexts. | -| define | `cudaHostAllocWriteCombined` | `hipHostMallocWriteCombined` | Write-combined memory. | -| define | `cudaHostRegisterDefault` | `hipHostRegisterDefault` | Default host memory registration flag. | -| define | `cudaHostRegisterIoMemory` | `hipHostRegisterIoMemory` | Memory-mapped I/O space. | -| define | `cudaHostRegisterMapped` | `hipHostRegisterMapped` | Map registered memory into device space. | -| define | `cudaHostRegisterPortable` | `hipHostRegisterPortable` | Pinned memory accessible by all CUDA contexts. | -| define | `cudaIpcMemLazyEnablePeerAccess` | `hipIpcMemLazyEnablePeerAccess` | Automatically enable peer access between remote devices as needed. | -| define | `cudaMemAttachGlobal` | | Memory can be accessed by any stream on any device. | -| define | `cudaMemAttachHost` | | Memory cannot be accessed by any stream on any device. | -| define | `cudaMemAttachSingle` | | Memory can only be accessed by a single stream on the associated device. | -| define | `cudaOccupancyDefault` | | Default behavior. | -| define | `cudaOccupancyDisableCachingOverride` | | Assume global caching is enabled and cannot be automatically turned off. | -| define | `cudaPeerAccessDefault` | | Default peer addressing enable flag. | -| define | `cudaStreamDefault` | `hipStreamDefault` | Default stream flag. | -| define | `cudaStreamLegacy` | | Default stream flag. | -| define | `cudaStreamNonBlocking` | `hipStreamNonBlocking` | Stream does not synchronize with stream 0 (the NULL stream). | -| define | `cudaStreamPerThread` | | Per-thread stream handle. | +| **type** | **CUDA** | **HIP** | +|-------------:|-----------------------------------------------|------------------------------------------------------| +| struct | `cudaChannelFormatDesc` | `hipChannelFormatDesc` | +| struct | `cudaDeviceProp` | `hipDeviceProp_t` | +| struct | `cudaExtent` | `hipExtent` | +| struct | `cudaFuncAttributes` | | +| struct | `cudaIpcEventHandle_t` | `hipIpcEventHandle_t` | +| struct | `cudaIpcMemHandle_t` | `hipIpcMemHandle_t` | +| struct | `cudaMemcpy3DParms` | `hipMemcpy3DParms` | +| struct | `cudaMemcpy3DPeerParms` | | +| struct | `cudaPitchedPtr` | `hipPitchedPtr` | +| struct | `cudaPointerAttributes` | `hipPointerAttribute_t` | +| struct | `cudaPos` | `hipPos` | +| struct | `cudaResourceDesc` | `hipResourceDesc` | +| struct | `cudaResourceViewDesc` | `hipResourceViewDesc` | +| struct | `cudaTextureDesc` | `hipTextureDesc` | +| struct | `surfaceReference` | | +| struct | `textureReference` | `textureReference` | +| enum |***`cudaChannelFormatKind`*** |***`hipChannelFormatKind`*** | +| 0 |*`cudaChannelFormatKindSigned`* |*`hipChannelFormatKindSigned`* | +| 1 |*`cudaChannelFormatKindUnsigned`* |*`hipChannelFormatKindUnsigned`* | +| 2 |*`cudaChannelFormatKindFloat`* |*`hipChannelFormatKindFloat`* | +| 3 |*`cudaChannelFormatKindNone`* |*`hipChannelFormatKindNone`* | +| enum |***`cudaComputeMode`*** | | +| 0 |*`cudaComputeModeDefault`* | | +| 1 |*`cudaComputeModeExclusive`* | | +| 2 |*`cudaComputeModeProhibited`* | | +| 3 |*`cudaComputeModeExclusiveProcess`* | | +| enum |***`cudaDeviceAttr`*** |***`hipDeviceAttribute_t`*** | +| 1 |*`cudaDevAttrMaxThreadsPerBlock`* |*`hipDeviceAttributeMaxThreadsPerBlock`* | +| 2 |*`cudaDevAttrMaxBlockDimX`* |*`hipDeviceAttributeMaxBlockDimX`* | +| 3 |*`cudaDevAttrMaxBlockDimY`* |*`hipDeviceAttributeMaxBlockDimY`* | +| 4 |*`cudaDevAttrMaxBlockDimZ`* |*`hipDeviceAttributeMaxBlockDimZ`* | +| 5 |*`cudaDevAttrMaxGridDimX`* |*`hipDeviceAttributeMaxGridDimX`* | +| 6 |*`cudaDevAttrMaxGridDimY`* |*`hipDeviceAttributeMaxGridDimY`* | +| 7 |*`cudaDevAttrMaxGridDimZ`* |*`hipDeviceAttributeMaxGridDimZ`* | +| 8 |*`cudaDevAttrMaxSharedMemoryPerBlock`* |*`hipDeviceAttributeMaxSharedMemoryPerBlock`* | +| 9 |*`cudaDevAttrTotalConstantMemory`* |*`hipDeviceAttributeTotalConstantMemory`* | +| 10 |*`cudaDevAttrWarpSize`* |*`hipDeviceAttributeWarpSize`* | +| 11 |*`cudaDevAttrMaxPitch`* | | +| 12 |*`cudaDevAttrMaxRegistersPerBlock`* |*`hipDeviceAttributeMaxRegistersPerBlock`* | +| 13 |*`cudaDevAttrClockRate`* |*`hipDeviceAttributeClockRate`* | +| 14 |*`cudaDevAttrTextureAlignment`* | | +| 15 |*`cudaDevAttrGpuOverlap`* | | +| 16 |*`cudaDevAttrMultiProcessorCount`* |*`hipDeviceAttributeMultiprocessorCount`* | +| 17 |*`cudaDevAttrKernelExecTimeout`* | | +| 18 |*`cudaDevAttrIntegrated`* | | +| 19 |*`cudaDevAttrCanMapHostMemory`* | | +| 20 |*`cudaDevAttrComputeMode`* |*`hipDeviceAttributeComputeMode`* | +| 21 |*`cudaDevAttrMaxTexture1DWidth`* | | +| 22 |*`cudaDevAttrMaxTexture2DWidth`* | | +| 23 |*`cudaDevAttrMaxTexture2DHeight`* | | +| 24 |*`cudaDevAttrMaxTexture3DWidth`* | | +| 25 |*`cudaDevAttrMaxTexture3DHeight`* | | +| 26 |*`cudaDevAttrMaxTexture3DDepth`* | | +| 27 |*`cudaDevAttrMaxTexture2DLayeredWidth`* | | +| 28 |*`cudaDevAttrMaxTexture2DLayeredHeight`* | | +| 29 |*`cudaDevAttrMaxTexture2DLayeredLayers`* | | +| 30 |*`cudaDevAttrSurfaceAlignment`* | | +| 31 |*`cudaDevAttrConcurrentKernels`* |*`hipDeviceAttributeConcurrentKernels`* | +| 32 |*`cudaDevAttrEccEnabled`* | | +| 33 |*`cudaDevAttrPciBusId`* |*`hipDeviceAttributePciBusId`* | +| 34 |*`cudaDevAttrPciDeviceId`* |*`hipDeviceAttributePciDeviceId`* | +| 35 |*`cudaDevAttrTccDriver`* | | +| 36 |*`cudaDevAttrMemoryClockRate`* |*`hipDeviceAttributeMemoryClockRate`* | +| 37 |*`cudaDevAttrGlobalMemoryBusWidth`* |*`hipDeviceAttributeMemoryBusWidth`* | +| 38 |*`cudaDevAttrL2CacheSize`* |*`hipDeviceAttributeL2CacheSize`* | +| 39 |*`cudaDevAttrMaxThreadsPerMultiProcessor`* |*`hipDeviceAttributeMaxThreadsPerMultiProcessor`* | +| 40 |*`cudaDevAttrAsyncEngineCount`* | | +| 41 |*`cudaDevAttrUnifiedAddressing`* | | +| 42 |*`cudaDevAttrMaxTexture1DLayeredWidth`* | | +| 43 |*`cudaDevAttrMaxTexture1DLayeredLayers`* | | +| 44 | | | +| 45 |*`cudaDevAttrMaxTexture2DGatherWidth`* | | +| 46 |*`cudaDevAttrMaxTexture2DGatherHeight`* | | +| 47 |*`cudaDevAttrMaxTexture3DWidthAlt`* | | +| 48 |*`cudaDevAttrMaxTexture3DHeightAlt`* | | +| 49 |*`cudaDevAttrMaxTexture3DDepthAlt`* | | +| 50 |*`cudaDevAttrPciDomainId`* | | +| 51 |*`cudaDevAttrTexturePitchAlignment`* | | +| 52 |*`cudaDevAttrMaxTextureCubemapWidth`* | | +| 53 |*`cudaDevAttrMaxTextureCubemapLayeredWidth`* | | +| 54 |*`cudaDevAttrMaxTextureCubemapLayeredLayers`* | | +| 55 |*`cudaDevAttrMaxSurface1DWidth`* | | +| 56 |*`cudaDevAttrMaxSurface2DWidth`* | | +| 57 |*`cudaDevAttrMaxSurface2DHeight`* | | +| 58 |*`cudaDevAttrMaxSurface3DWidth`* | | +| 59 |*`cudaDevAttrMaxSurface3DHeight`* | | +| 60 |*`cudaDevAttrMaxSurface3DDepth`* | | +| 61 |*`cudaDevAttrMaxSurface1DLayeredWidth`* | | +| 62 |*`cudaDevAttrMaxSurface1DLayeredLayers`* | | +| 63 |*`cudaDevAttrMaxSurface2DLayeredWidth`* | | +| 64 |*`cudaDevAttrMaxSurface2DLayeredHeight`* | | +| 65 |*`cudaDevAttrMaxSurface2DLayeredLayers`* | | +| 66 |*`cudaDevAttrMaxSurfaceCubemapWidth`* | | +| 67 |*`cudaDevAttrMaxSurfaceCubemapLayeredWidth`* | | +| 68 |*`cudaDevAttrMaxSurfaceCubemapLayeredLayers`* | | +| 69 |*`cudaDevAttrMaxTexture1DLinearWidth`* | | +| 70 |*`cudaDevAttrMaxTexture2DLinearWidth`* | | +| 71 |*`cudaDevAttrMaxTexture2DLinearHeight`* | | +| 72 |*`cudaDevAttrMaxTexture2DLinearPitch`* | | +| 73 |*`cudaDevAttrMaxTexture2DMipmappedWidth`* | | +| 74 |*`cudaDevAttrMaxTexture2DMipmappedHeight`* | | +| 75 |*`cudaDevAttrComputeCapabilityMajor`* |*`hipDeviceAttributeComputeCapabilityMajor`* | +| 76 |*`cudaDevAttrComputeCapabilityMinor`* |*`hipDeviceAttributeComputeCapabilityMinor`* | +| 77 |*`cudaDevAttrMaxTexture1DMipmappedWidth`* | | +| 78 |*`cudaDevAttrStreamPrioritiesSupported`* | | +| 79 |*`cudaDevAttrGlobalL1CacheSupported`* | | +| 80 |*`cudaDevAttrLocalL1CacheSupported`* | | +| 81 |*`cudaDevAttrMaxSharedMemoryPerMultiprocessor`*|*`hipDeviceAttributeMaxSharedMemoryPerMultiprocessor`*| +| 82 |*`cudaDevAttrMaxRegistersPerMultiprocessor`* | | +| 83 |*`cudaDevAttrManagedMemory`* | | +| 84 |*`cudaDevAttrIsMultiGpuBoard`* |*`hipDeviceAttributeIsMultiGpuBoard`* | +| 85 |*`cudaDevAttrMultiGpuBoardGroupID`* | | +| enum |***`cudaError`*** |***`hipError_t`*** | +| enum |***`cudaError_t`*** |***`hipError_t`*** | +| 0 |*`cudaSuccess`* |*`hipSuccess`* | +| 1 |*`cudaErrorMissingConfiguration`* | | +| 2 |*`cudaErrorMemoryAllocation`* |*`hipErrorMemoryAllocation`* | +| 3 |*`cudaErrorInitializationError`* |*`hipErrorInitializationError`* | +| 4 |*`cudaErrorLaunchFailure`* | | +| 5 |*`cudaErrorPriorLaunchFailure`* | | +| 6 |*`cudaErrorLaunchTimeout`* | | +| 7 |*`cudaErrorLaunchOutOfResources`* |*`hipErrorLaunchOutOfResources`* | +| 8 |*`cudaErrorInvalidDeviceFunction`* | | +| 9 |*`cudaErrorInvalidConfiguration`* | | +| 10 |*`cudaErrorInvalidDevice`* |*`hipErrorInvalidDevice`* | +| 11 |*`cudaErrorInvalidValue`* |*`hipErrorInvalidValue`* | +| 12 |*`cudaErrorInvalidPitchValue`* | | +| 13 |*`cudaErrorInvalidSymbol`* | | +| 14 |*`cudaErrorMapBufferObjectFailed`* | | +| 15 |*`cudaErrorUnmapBufferObjectFailed`* | | +| 16 |*`cudaErrorInvalidHostPointer`* | | +| 17 |*`cudaErrorInvalidDevicePointer`* |*`hipErrorInvalidDevicePointer`* | +| 18 |*`cudaErrorInvalidTexture`* | | +| 19 |*`cudaErrorInvalidTextureBinding`* | | +| 20 |*`cudaErrorInvalidChannelDescriptor`* | | +| 21 |*`cudaErrorInvalidMemcpyDirection`* | | +| 22 |*`cudaErrorAddressOfConstant`* | | +| 23 |*`cudaErrorTextureFetchFailed`* | | +| 24 |*`cudaErrorTextureNotBound`* | | +| 25 |*`cudaErrorSynchronizationError`* | | +| 26 |*`cudaErrorInvalidFilterSetting`* | | +| 27 |*`cudaErrorInvalidNormSetting`* | | +| 28 |*`cudaErrorMixedDeviceExecution`* | | +| 29 |*`cudaErrorCudartUnloading`* | | +| 30 |*`cudaErrorUnknown`* |*`hipErrorUnknown`* | +| 31 |*`cudaErrorNotYetImplemented`* | | +| 32 |*`cudaErrorMemoryValueTooLarge`* | | +| 33 |*`cudaErrorInvalidResourceHandle`* |*`hipErrorInvalidResourceHandle`* | +| 34 |*`cudaErrorNotReady`* |*`hipErrorNotReady`* | +| 35 |*`cudaErrorInsufficientDriver`* | | +| 36 |*`cudaErrorSetOnActiveProcess`* | | +| 37 |*`cudaErrorInvalidSurface`* | | +| 38 |*`cudaErrorNoDevice`* |*`hipErrorNoDevice`* | +| 39 |*`cudaErrorECCUncorrectable`* | | +| 40 |*`cudaErrorSharedObjectSymbolNotFound`* | | +| 41 |*`cudaErrorSharedObjectInitFailed`* | | +| 42 |*`cudaErrorUnsupportedLimit`* |*`hipErrorUnsupportedLimit`* | +| 43 |*`cudaErrorDuplicateVariableName`* | | +| 44 |*`cudaErrorDuplicateTextureName`* | | +| 45 |*`cudaErrorDuplicateSurfaceName`* | | +| 46 |*`cudaErrorDevicesUnavailable`* | | +| 47 |*`cudaErrorInvalidKernelImage`* | | +| 48 |*`cudaErrorNoKernelImageForDevice`* | | +| 49 |*`cudaErrorIncompatibleDriverContext`* | | +| 50 |*`cudaErrorPeerAccessAlreadyEnabled`* |*`hipErrorPeerAccessAlreadyEnabled`* | +| 51 |*`cudaErrorPeerAccessNotEnabled`* |*`hipErrorPeerAccessNotEnabled`* | +| 52 | | | +| 53 | | | +| 54 |*`cudaErrorDeviceAlreadyInUse`* | | +| 55 |*`cudaErrorProfilerDisabled`* | | +| 56 |*`cudaErrorProfilerNotInitialized`* | | +| 57 |*`cudaErrorProfilerAlreadyStarted`* | | +| 58 |*`cudaErrorProfilerAlreadyStopped`* | | +| 59 |*`cudaErrorAssert`* | | +| 60 |*`cudaErrorTooManyPeers`* | | +| 61 |*`cudaErrorHostMemoryAlreadyRegistered`* | *`hipErrorHostMemoryAlreadyRegistered`* | +| 62 |*`cudaErrorHostMemoryNotRegistered`* | *`hipErrorHostMemoryNotRegistered`* | +| 63 |*`cudaErrorOperatingSystem`* | | +| 64 |*`cudaErrorPeerAccessUnsupported`* | | +| 65 |*`cudaErrorLaunchMaxDepthExceeded`* | | +| 66 |*`cudaErrorLaunchFileScopedTex`* | | +| 67 |*`cudaErrorLaunchFileScopedSurf`* | | +| 68 |*`cudaErrorSyncDepthExceeded`* | | +| 69 |*`cudaErrorLaunchPendingCountExceeded`* | | +| 70 |*`cudaErrorNotPermitted`* | | +| 71 |*`cudaErrorNotSupported`* | | +| 72 |*`cudaErrorHardwareStackError`* | | +| 73 |*`cudaErrorIllegalInstruction`* | | +| 74 |*`cudaErrorMisalignedAddress`* | | +| 75 |*`cudaErrorInvalidAddressSpace`* | | +| 76 |*`cudaErrorInvalidPc`* | | +| 77 |*`cudaErrorIllegalAddress`* | | +| 78 |*`cudaErrorInvalidPtx`* | | +| 79 |*`cudaErrorInvalidGraphicsContext`* | | +| 0x7f |*`cudaErrorStartupFailure`* | | +| 1000 |*`cudaErrorApiFailureBase`* | | +| enum |***`cudaFuncCache`*** |***`hipFuncCache_t`*** | +| 0 |*`cudaFuncCachePreferNone`* |*`hipFuncCachePreferNone`* | +| 1 |*`cudaFuncCachePreferShared`* |*`hipFuncCachePreferShared`* | +| 2 |*`cudaFuncCachePreferL1`* |*`hipFuncCachePreferL1`* | +| 3 |*`cudaFuncCachePreferEqual`* |*`hipFuncCachePreferEqual`* | +| enum |***`cudaGraphicsCubeFace`*** | | +| 0x00 |*`cudaGraphicsCubeFacePositiveX`* | | +| 0x01 |*`cudaGraphicsCubeFaceNegativeX`* | | +| 0x02 |*`cudaGraphicsCubeFacePositiveY`* | | +| 0x03 |*`cudaGraphicsCubeFaceNegativeY`* | | +| 0x04 |*`cudaGraphicsCubeFacePositiveZ`* | | +| 0x05 |*`cudaGraphicsCubeFaceNegativeZ`* | | +| enum |***`cudaGraphicsMapFlags`*** | | +| 0 |*`cudaGraphicsMapFlagsNone`* | | +| 1 |*`cudaGraphicsMapFlagsReadOnly`* | | +| 2 |*`cudaGraphicsMapFlagsWriteDiscard`* | | +| enum |***`cudaGraphicsRegisterFlags`*** | | +| 0 |*`cudaGraphicsRegisterFlagsNone`* | | +| 1 |*`cudaGraphicsRegisterFlagsReadOnly`* | | +| 2 |*`cudaGraphicsRegisterFlagsWriteDiscard`* | | +| 4 |*`cudaGraphicsRegisterFlagsSurfaceLoadStore`* | | +| 8 |*`cudaGraphicsRegisterFlagsTextureGather`* | | +| enum |***`cudaLimit`*** |***`hipLimit_t`*** | +| 0x00 |*`cudaLimitStackSize`* | | +| 0x01 |*`cudaLimitPrintfFifoSize`* | | +| 0x02 |*`cudaLimitMallocHeapSize`* |*`hipLimitMallocHeapSize`* | +| 0x03 |*`cudaLimitDevRuntimeSyncDepth`* | | +| 0x04 |*`cudaLimitDevRuntimePendingLaunchCount`* | | +| enum |***`cudaMemcpyKind`*** |***`hipMemcpyKind`*** | +| 0 |*`cudaMemcpyHostToHost`* |*`hipMemcpyHostToHost`* | +| 1 |*`cudaMemcpyHostToDevice`* |*`hipMemcpyHostToDevice`* | +| 2 |*`cudaMemcpyDeviceToHost`* |*`hipMemcpyDeviceToHost`* | +| 3 |*`cudaMemcpyDeviceToDevice`* |*`hipMemcpyDeviceToDevice`* | +| 4 |*`cudaMemcpyDefault`* |*`hipMemcpyDefault`* | +| enum |***`cudaMemoryType`*** |***`hipMemoryType`*** | +| 1 |*`cudaMemoryTypeHost`* |*`hipMemoryTypeHost`* | +| 2 |*`cudaMemoryTypeDevice`* |*`hipMemoryTypeDevice`* | +| enum |***`cudaResourceType`*** |***`hipResourceType`*** | +| 0 |*`cudaResourceTypeArray`* |*`hipResourceTypeArray`* | +| 1 |*`cudaResourceTypeMipmappedArray`* |*`hipResourceTypeMipmappedArray`* | +| 2 |*`cudaResourceTypeLinear`* |*`hipResourceTypeLinear`* | +| 3 |*`cudaResourceTypePitch2D`* |*`hipResourceTypePitch2D`* | +| enum |***`cudaResourceViewFormat`*** |***`hipResourceViewFormat`*** | +| 0x00 |*`cudaResViewFormatNone`* |*`hipResViewFormatNone`* | +| 0x01 |*`cudaResViewFormatUnsignedChar1`* |*`hipResViewFormatUnsignedChar1`* | +| 0x02 |*`cudaResViewFormatUnsignedChar2`* |*`hipResViewFormatUnsignedChar2`* | +| 0x03 |*`cudaResViewFormatUnsignedChar4`* |*`hipResViewFormatUnsignedChar4`* | +| 0x04 |*`cudaResViewFormatSignedChar1`* |*`hipResViewFormatSignedChar1`* | +| 0x05 |*`cudaResViewFormatSignedChar2`* |*`hipResViewFormatSignedChar2`* | +| 0x06 |*`cudaResViewFormatSignedChar4`* |*`hipResViewFormatSignedChar4`* | +| 0x07 |*`cudaResViewFormatUnsignedShort1`* |*`hipResViewFormatUnsignedShort1`* | +| 0x08 |*`cudaResViewFormatUnsignedShort2`* |*`hipResViewFormatUnsignedShort2`* | +| 0x09 |*`cudaResViewFormatUnsignedShort4`* |*`hipResViewFormatUnsignedShort4`* | +| 0x0a |*`cudaResViewFormatSignedShort1`* |*`hipResViewFormatSignedShort1`* | +| 0x0b |*`cudaResViewFormatSignedShort2`* |*`hipResViewFormatSignedShort2`* | +| 0x0c |*`cudaResViewFormatSignedShort4`* |*`hipResViewFormatSignedShort4`* | +| 0x0d |*`cudaResViewFormatUnsignedInt1`* |*`hipResViewFormatUnsignedInt1`* | +| 0x0e |*`cudaResViewFormatUnsignedInt2`* |*`hipResViewFormatUnsignedInt2`* | +| 0x0f |*`cudaResViewFormatUnsignedInt4`* |*`hipResViewFormatUnsignedInt4`* | +| 0x10 |*`cudaResViewFormatSignedInt1`* |*`hipResViewFormatSignedInt1`* | +| 0x11 |*`cudaResViewFormatSignedInt2`* |*`hipResViewFormatSignedInt2`* | +| 0x12 |*`cudaResViewFormatSignedInt4`* |*`hipResViewFormatSignedInt4`* | +| 0x13 |*`cudaResViewFormatHalf1`* |*`hipResViewFormatHalf1`* | +| 0x14 |*`cudaResViewFormatHalf2`* |*`hipResViewFormatHalf2`* | +| 0x15 |*`cudaResViewFormatHalf4`* |*`hipResViewFormatHalf4`* | +| 0x16 |*`cudaResViewFormatFloat1`* |*`hipResViewFormatFloat1`* | +| 0x17 |*`cudaResViewFormatFloat2`* |*`hipResViewFormatFloat2`* | +| 0x18 |*`cudaResViewFormatFloat4`* |*`hipResViewFormatFloat4`* | +| 0x19 |*`cudaResViewFormatUnsignedBlockCompressed1`* |*`hipResViewFormatUnsignedBlockCompressed1`* | +| 0x1a |*`cudaResViewFormatUnsignedBlockCompressed2`* |*`hipResViewFormatUnsignedBlockCompressed2`* | +| 0x1b |*`cudaResViewFormatUnsignedBlockCompressed3`* |*`hipResViewFormatUnsignedBlockCompressed3`* | +| 0x1c |*`cudaResViewFormatUnsignedBlockCompressed4`* |*`hipResViewFormatUnsignedBlockCompressed4`* | +| 0x1d |*`cudaResViewFormatSignedBlockCompressed4`* |*`hipResViewFormatSignedBlockCompressed4`* | +| 0x1e |*`cudaResViewFormatUnsignedBlockCompressed5`* |*`hipResViewFormatUnsignedBlockCompressed5`* | +| 0x1f |*`cudaResViewFormatSignedBlockCompressed5`* |*`hipResViewFormatSignedBlockCompressed5`* | +| 0x20 |*`cudaResViewFormatUnsignedBlockCompressed6H`* |*`hipResViewFormatUnsignedBlockCompressed6H`* | +| 0x21 |*`cudaResViewFormatSignedBlockCompressed6H`* |*`hipResViewFormatSignedBlockCompressed6H`* | +| 0x22 |*`cudaResViewFormatUnsignedBlockCompressed7`* |*`hipResViewFormatUnsignedBlockCompressed7`* | +| enum |***`cudaSharedMemConfig`*** |***`hipSharedMemConfig`*** | +| 0 |*`cudaSharedMemBankSizeDefault`* |*`hipSharedMemBankSizeDefault`* | +| 1 |*`cudaSharedMemBankSizeFourByte`* |*`hipSharedMemBankSizeFourByte`* | +| 2 |*`cudaSharedMemBankSizeEightByte`* |*`hipSharedMemBankSizeEightByte`* | +| enum |***`cudaSurfaceBoundaryMode`*** | | +| 0 |*`cudaBoundaryModeZero`* | | +| 1 |*`cudaBoundaryModeClamp`* | | +| 2 |*`cudaBoundaryModeTrap`* | | +| enum |***`cudaSurfaceFormatMode`*** | | +| 0 |*`cudaFormatModeForced`* | | +| 1 |*`cudaFormatModeAuto`* | | +| enum |***`cudaTextureAddressMode`*** |***`hipTextureAddressMode`*** | +| 0 |*`cudaAddressModeWrap`* |*`hipAddressModeWrap`* | +| 1 |*`cudaAddressModeClamp`* |*`hipAddressModeClamp`* | +| 2 |*`cudaAddressModeMirror`* |*`hipAddressModeMirror`* | +| 3 |*`cudaAddressModeBorder`* |*`hipAddressModeBorder`* | +| enum |***`cudaTextureFilterMode`*** |***`hipTextureFilterMode`*** | +| 0 |*`cudaFilterModePoint`* |*`hipFilterModePoint`* | +| 1 |*`cudaFilterModeLinear`* |*`hipFilterModeLinear`* | +| enum |***`cudaTextureReadMode`*** |***`hipTextureReadMode`*** | +| 0 |*`cudaReadModeElementType`* |*`hipReadModeElementType`* | +| 1 |*`cudaReadModeNormalizedFloat`* |*`hipReadModeNormalizedFloat`* | +| enum |***`cudaGLDeviceList`*** | | +| 0x01 |*`cudaGLDeviceListAll`* | | +| 0x02 |*`cudaGLDeviceListCurrentFrame`* | | +| 0x03 |*`cudaGLDeviceListNextFrame`* | | +| enum |***`cudaGLMapFlags`*** | | +| 0x00 |*`cudaGLMapFlagsNone`* | | +| 0x01 |*`cudaGLMapFlagsReadOnly`* | | +| 0x02 |*`cudaGLMapFlagsWriteDiscard`* | | +| enum |***`cudaD3D9DeviceList`*** | | +| 1 |*`cudaD3D9DeviceListAll`* | | +| 2 |*`cudaD3D9DeviceListCurrentFrame`* | | +| 3 |*`cudaD3D9DeviceListNextFrame`* | | +| enum |***`cudaD3D9MapFlags`*** | | +| 0 |*`cudaD3D9MapFlagsNone`* | | +| 1 |*`cudaD3D9MapFlagsReadOnly`* | | +| 2 |*`cudaD3D9MapFlagsWriteDiscard`* | | +| enum |***`cudaD3D9RegisterFlags`*** | | +| 0 |*`cudaD3D9RegisterFlagsNone`* | | +| 1 |*`cudaD3D9RegisterFlagsArray`* | | +| enum |***`cudaD3D10DeviceList`*** | | +| 1 |*`cudaD3D10DeviceListAll`* | | +| 2 |*`cudaD3D10DeviceListCurrentFrame`* | | +| 3 |*`cudaD3D10DeviceListNextFrame`* | | +| enum |***`cudaD3D10MapFlags`*** | | +| 0 |*`cudaD3D10MapFlagsNone`* | | +| 1 |*`cudaD3D10MapFlagsReadOnly`* | | +| 2 |*`cudaD3D10MapFlagsWriteDiscard`* | | +| enum |***`cudaD3D10RegisterFlags`*** | | +| 0 |*`cudaD3D10RegisterFlagsNone`* | | +| 1 |*`cudaD3D10RegisterFlagsArray`* | | +| enum |***`cudaD3D11DeviceList`*** | | +| 1 |*`cudaD3D11DeviceListAll`* | | +| 2 |*`cudaD3D11DeviceListCurrentFrame`* | | +| 3 |*`cudaD3D11DeviceListNextFrame`* | | +| struct | `cudaArray` | `hipArray` | +| typedef | `cudaArray_t` | `hipArray_t` | +| typedef | `cudaArray_const_t` | `hipArray_const_t` | +| enum | `cudaError` | `hipError_t` | +| typedef | `cudaError_t` | `hipError_t` | +| typedef | `cudaEvent_t` | `hipEvent_t` | +| typedef | `cudaGraphicsResource_t` | | +| typedef | `cudaMipmappedArray_t` | `hipMipmappedArray_t` | +| typedef | `cudaMipmappedArray_const_t` | `hipMipmappedArray_const_t` | +| enum |***`cudaOutputMode`*** | | +| 0x00 |*`cudaKeyValuePair`* | | +| 0x01 |*`cudaCSV`* | | +| typedef | `cudaOutputMode_t` | | +| typedef | `cudaStream_t` | `hipStream_t` | +| typedef | `cudaStreamCallback_t` | `hipStreamCallback_t` | +| typedef | `cudaSurfaceObject_t` | | +| typedef | `cudaTextureObject_t` | | +| typedef | `CUuuid_stcudaUUID_t` | | +| define | `CUDA_IPC_HANDLE_SIZE` | | +| define | `cudaArrayCubemap` | | +| define | `cudaArrayDefault` | | +| define | `cudaArrayLayered` | | +| define | `cudaArraySurfaceLoadStore` | | +| define | `cudaArrayTextureGather` | | +| define | `cudaDeviceBlockingSync` | `hipDeviceScheduleBlockingSync` | +| define | `cudaDeviceLmemResizeToMax` | | +| define | `cudaDeviceMapHost` | | +| define | `cudaDeviceMask` | | +| define | `cudaDevicePropDontCare` | | +| define | `cudaDeviceScheduleAuto` | `hipDeviceScheduleAuto` | +| define | `cudaDeviceScheduleBlockingSync` | `hipDeviceScheduleBlockingSync` | +| define | `cudaDeviceScheduleMask` | `hipDeviceScheduleMask` | +| define | `cudaDeviceScheduleSpin` | `hipDeviceScheduleSpin` | +| define | `cudaDeviceScheduleYield` | `hipDeviceScheduleYield` | +| define | `cudaEventDefault` | `hipEventDefault` | +| define | `cudaEventDisableTiming` | `hipEventDisableTiming` | +| define | `cudaEventInterprocess` | `hipEventInterprocess` | +| define | `cudaHostAllocDefault` | `hipHostMallocDefault` | +| define | `cudaHostAllocMapped` | `hipHostMallocMapped` | +| define | `cudaHostAllocPortable` | `hipHostMallocPortable` | +| define | `cudaHostAllocWriteCombined` | `hipHostMallocWriteCombined` | +| define | `cudaHostRegisterDefault` | `hipHostRegisterDefault` | +| define | `cudaHostRegisterIoMemory` | `hipHostRegisterIoMemory` | +| define | `cudaHostRegisterMapped` | `hipHostRegisterMapped` | +| define | `cudaHostRegisterPortable` | `hipHostRegisterPortable` | +| define | `cudaIpcMemLazyEnablePeerAccess` | `hipIpcMemLazyEnablePeerAccess` | +| define | `cudaMemAttachGlobal` | | +| define | `cudaMemAttachHost` | | +| define | `cudaMemAttachSingle` | | +| define | `cudaOccupancyDefault` | | +| define | `cudaOccupancyDisableCachingOverride` | | +| define | `cudaPeerAccessDefault` | | +| define | `cudaStreamDefault` | `hipStreamDefault` | +| define | `cudaStreamLegacy` | | +| define | `cudaStreamNonBlocking` | `hipStreamNonBlocking` | +| define | `cudaStreamPerThread` | | diff --git a/docs/markdown/hip_faq.md b/docs/markdown/hip_faq.md index ddf70f2875..c311aae320 100644 --- a/docs/markdown/hip_faq.md +++ b/docs/markdown/hip_faq.md @@ -48,31 +48,24 @@ The HIP API documentation describes each API and its limitations, if any, compar ### What is not supported? #### Runtime/Driver API features -At a high-level, the following features are not supported: -- Textures +)t a high-level, the following features are not supported: +- Textures (partial support available) - Dynamic parallelism (CUDA 5.0) - Managed memory (CUDA 6.5) - Graphics interoperability with OpenGL or Direct3D -- CUDA Driver API - CUDA IPC Functions (Under Development) - CUDA array, mipmappedArray and pitched memory -- MemcpyToSymbol functions - Queue priority controls See the [API Support Table](CUDA_Runtime_API_functions_supported_by_HIP.md) for more detailed information. #### Kernel language features -- Device-side dynamic memory allocations (malloc, free, new, delete) (CUDA 4.0) +- C++-style device-side dynamic memory allocations (free, new, delete) (CUDA 4.0) - Virtual functions, indirect functions and try/catch (CUDA 4.0) - `__prof_trigger` - PTX assembly (CUDA 4.0). HCC supports inline GCN assembly. - Several kernel features are under development. See the [HIP Kernel Language](hip_kernel_language.md) for more information. These include: - printf - - assert - - `__restrict__` - - `__threadfence*_`, `__syncthreads*` - - Unbounded loop unroll - ### Is HIP a drop-in replacement for CUDA? @@ -101,18 +94,20 @@ However, we can provide a rough summary of the features included in each CUDA SD - Per-thread-streams (under development) - C++11 (HCC supports all of C++11, all of C++14 and some C++17 features) - CUDA 7.5 - - float16 + - float16 (supported) - CUDA 8.0 - - TBD. + - Page Migration including cudaMemAdvise, cudaMemPrefetch, other cudaMem* APIs(not supported) + ### What libraries does HIP support? -HIP includes growing support for the 4 key math libraries using hcBlas, hcFft, hcrng and hcsparse. -These offer pointer-based memory interfaces (as opposed to opaque buffers) and can be easily interfaced with other HCC applications. Developers should use conditional compilation if portability to nvcc systems is desired - using calls to cu* routines on one path and hc* routines on the other. +HIP includes growing support for the 4 key math libraries using hcBlas, hcFft, hcrng and hcsparse, as well as MIOpen for machine intelligence applications. +These offer pointer-based memory interfaces (as opposed to opaque buffers) and can be easily interfaced with other HIP applications. +The hip interfaces support both ROCm and CUDA paths, with familiar library interfaces. -- [hcblas](https://bitbucket.org/multicoreware/hcblas) -- [hcfft](https://bitbucket.org/multicoreware/hcfft) -- [hcsparse](https://bitbucket.org/multicoreware/hcsparse) -- [hcrng](https://bitbucket.org/multicoreware/hcrng) +- [hipBlas](https://github.com/ROCmSoftwarePlatform/hipBLAS), which utilizes [rocBlas](https://github.com/ROCmSoftwarePlatform/rocBLAS). +- [hipfft](https://github.com/ROCmSoftwarePlatform/hcFFT) +- [hipsparse](https://github.com/ROCmSoftwarePlatform/hcSPARSE) +- [hiprng](https://github.com/ROCmSoftwarePlatform/hcrng) Additionally, some of the cublas routines are automatically converted to hipblas equivalents by the hipify-clang tool. These APIs use cublas or hcblas depending on the platform, and replace the need to use conditional compilation. @@ -219,7 +214,7 @@ If platform portability is important, use #ifdef __HIP_PLATFORM_NVCC__ to guard ### On HCC, can I use HC functionality with HIP? Yes. The code can include hc.hpp and use HC functions inside the kernel. A typical use-case is to use AMD-specific hardware features such as the permute, swizzle, or DPP operations. -The "-stdlib=libc++" must be passed to hipcc in order to compile hc.hpp. See the 'bit_extract' sample for an example. +See the 'bit_extract' sample for an example. Also these functions can be used to extract HCC accelerator and accelerator_view structures from the HIP deviceId and hipStream_t: hipHccGetAccelerator(int deviceId, hc::accelerator *acc); diff --git a/docs/markdown/hip_kernel_language.md b/docs/markdown/hip_kernel_language.md index cfa5d0f871..094d7531e8 100644 --- a/docs/markdown/hip_kernel_language.md +++ b/docs/markdown/hip_kernel_language.md @@ -167,7 +167,7 @@ The `__shared__` keyword is supported. Managed memory, including the `__managed__` keyword, are not supported in HIP. ### `__restrict__` -The `__restrict__` keyword tells the compiler that the associated memory pointer will not alias with any other pointer in the kernel or function. This feature can help the compiler generate better code. In most cases, all pointer arguments must use this keyword to realize the benefit. hcc support for the `__restrict__` qualifier on kernel arguments is under development. +The `__restrict__` keyword tells the compiler that the associated memory pointer will not alias with any other pointer in the kernel or function. This feature can help the compiler generate better code. In most cases, all pointer arguments must use this keyword to realize the benefit. ## Built-In Variables @@ -603,6 +603,7 @@ The Cuda `__prof_trigger()` instruction is not supported. ## Assert The assert function is under development. +HIP does support an "abort" call which will terminate the process execution from inside the kernel. ## Printf @@ -690,7 +691,6 @@ for (int i=0; i<16; i++) ... ``` -Unbounded loop unroll is under development on HCC compiler. ``` #pragma unroll /* hint to compiler to completely unroll next loop. */ for (int i=0; i<16; i++) ... @@ -699,8 +699,18 @@ for (int i=0; i<16; i++) ... ## In-Line Assembly -In-line assembly, including in-line PTX, in-line HSAIL and in-line GCN ISA, is not supported. Users who need these features should employ conditional compilation to provide different functionally equivalent implementations on each target platform. +GCN ISA In-line assembly, is supported. For example: +``` +asm volatile ("v_mac_f32_e32 %0, %2, %3" : "=v" (out[i]) : "0"(out[i]), "v" (a), "v" (in[i])); +``` + +We insert the GCN isa into the kernel using `asm()` Assembler statement. +`volatile` keyword is used so that the optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations. +`v_mac_f32_e32` is the GCN instruction, for more information please refer - [AMD GCN3 ISA architecture manual](http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/) +Index for the respective operand in the ordered fashion is provided by `%` followed by position in the list of operands +`"v"` is the constraint code (for target-specific AMDGPU) for 32-bit VGPR register, for more info please refer - [Supported Constraint Code List for AMDGPU](https://llvm.org/docs/LangRef.html#supported-constraint-code-list) +Output Constraints are specified by an `"="` prefix as shown above ("=v"). This indicate that assemby will write to this operand, and the operand will then be made available as a return value of the asm expression. Input constraints do not have a prefix - just the constraint code. The constraint string of `"0"` says to use the assigned register for output as an input as well (it being the 0'th constraint). ## C++ Support The following C++ features are not supported: diff --git a/docs/markdown/hip_performance.md b/docs/markdown/hip_performance.md deleted file mode 100644 index 67a2f88b58..0000000000 --- a/docs/markdown/hip_performance.md +++ /dev/null @@ -1,39 +0,0 @@ -# HIP Performance Optimizations - -Please note that this document lists possible ways for experimenting with HIP stack to gain performance. Performance may vary from platform to platform. - -### Unpinned Memory Transfer Optimizations - -#### On Small BAR Setup - -There are two possible ways to transfer data from host-to-device (H2D) and device-to-host(D2H) - * Using Staging Buffers - * Using PinInPlace - -#### On Large BAR Setup - -There are three possible ways to transfer data from host-to-device (H2D) - * Using Staging Buffers - * Using PinInPlace - * Direct Memcpy - - And there are two possible ways to transfer data from device-to-host (D2H) - * Using Staging Buffers - * Using PinInPlace - -Some GPUs may not be able to directly access host memory, and in these cases we need to -stage the copy through an optimized pinned staging buffer, to implement H2D and D2H copies.The copy is broken into buffer-sized chunks to limit the size of the buffer and also to provide better performance by overlapping the CPU copies with the DMA copies. - -PinInPlace is another algorithm which pins the host memory "in-place", and copies it with the DMA engine. - -By default staging buffers are used for unpinned memory transfers. Environment variables allow control over the unpinned copy algorithm and parameters: - -- HIP_PININPLACE - This environment variable forces the use of PinInPlace logic for all unpinned memory copies - -- HIP_OPTIMAL_MEM_TRANSFER- This environment variable enables a hybrid memory copy logic based on thresholds. These thresholds can be managed with following environment variables: - - HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE - Threshold in bytes for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic. - - HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING - Threshold in bytes for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. - - HIP_D2H_MEM_TRANSFER_THRESHOLD - Threshold in bytes for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic. - - - diff --git a/docs/markdown/hip_porting_guide.md b/docs/markdown/hip_porting_guide.md index 84887fd512..12ec931f2a 100644 --- a/docs/markdown/hip_porting_guide.md +++ b/docs/markdown/hip_porting_guide.md @@ -405,7 +405,7 @@ Code should not assume a warp size of 32 or 64. See [Warp Cross-Lane Functions] ## memcpyToSymbol -HIP support for hipMemCpyToSymbol is complete. This feature allows a kernel +HIP support for hipMemcpyToSymbol is complete. This feature allows a kernel to define a device-side data symbol which can be accessed on the host side. The symbol can be in __constant or device space. diff --git a/docs/markdown/hip_programming_guide.md b/docs/markdown/hip_programming_guide.md new file mode 100644 index 0000000000..5d0c1f2497 --- /dev/null +++ b/docs/markdown/hip_programming_guide.md @@ -0,0 +1,89 @@ +# HIP Programming Guide + +## Host Memory + +### Introduction +hipHostMemory allocates pinned host memory which is mapped into the address space of all GPUs in the system. +There are two use cases for this host memory: +- Faster HostToDevice and DeviceToHost Data Transfers: +The runtime tracks the hipHostMalloc allocations and can avoid some of the setup required for regular unpinned memory. For exact measurements on a specific system, experiment with --unpinned and --pinned switches for the hipBusBandwidth tool. +- Zero-Copy GPU Access: +GPU can directly access the host memory over the CPU/GPU interconnect, without need to copy the data. This avoids the need for the copy, but during the kernel access each memory access must traverse the interconnect, which can be tens of times slower than accessing the GPU's local device memory. Zero-copy memory can be a good choice when the memory accesses are infrequent (perhaps only once). Zero-copy memory is typically "Coherent" and thus not cached by the GPU but this can be overridden if desired and is explained in more detail below. + +### Memory allocation flags +hipHostMalloc always sets the hipHostMallocPortable and hipHostMallocMapped flags. Both usage models described above use the same allocation flags, and the difference is in how the surrounding code uses the host memory. +See the hipHostMalloc API for more information. + + +### Coherency Controls +ROCm defines two coherency options for host memory: +- Coherent memory : Supports fine-grain synchronization while the kernel is running.  For example, a kernel can perform atomic operations that are visible to the host CPU or to other (peer) GPUs.  Synchronization instructions include threadfence_system and C++11-style atomic operations.   However, coherent memory cannot be cached by the GPU and thus may have lower performance. +- Non-coherent memory : Can be cached by GPU, but cannot support synchronization while the kernel is running.  Non-coherent memory can be optionally synchronized only at command (end-of-kernel or copy command) boundaries.  This memory is appropriate for high-performance access when fine-grain synchronization is not required. + +IP provides the developer with controls to select which type of memory is used via allocation flags passed to hipHostMalloc and the HIP_HOST_COHERENT environment variable: +- hipHostllocCoherent=0, hipHostMallocNonCoherent=0: Use HIP_HOST_COHERENT environment variable: + - If HIP_HOST_COHERENT is 1 or undefined, the host memory allocation is coherent. + - If host memory is `defined and 0: the host memory allocation is non-coherent. +- hipHostMallocCoherent=1, hipHostMallocNonCoherent=0: The host memory allocation will be coherent.  HIP_HOST_COHERENT env variable is ignored. +- hipHostMallocCoherent=0, hipHostMallocNonCoherent=1: The host memory allocation will be non-coherent.  HIP_HOST_COHERENT env variable is ignored. +- hipHostMallocCoherent=1, hipHostMallocNonCoherent=1: Illegal. + + +### Visibility of Zero-Copy Host Memory +Coherent host memory is automatically visible at synchronization points. +Non-coherent + +| HIP API | Synchronization Effect | Fence | Coherent Host Memory Visibiity | Non-Coherent Host Memory Visibility| +| --- | --- | --- | --- | --- | +| hipStreamSynchronize | host waits for all commands in the specified stream to complete | system-scope release | yes | yes | +| hipDeviceSynchronize | host waits for all commands in all streams on the specified device to complete | system-scope release | yes | yes | +| hipEventSynchronize | host waits for the specified event to complete | device-scope release | yes | depends - see below| +| hipStreamWaitEvent | stream waits for the specified event to complete | none | yes | no | + + +### hipEventSynchronize +Developers can control the release scope for hipEvents: +- By default, the GPU performs a device-scope acquire and release operation with each recorded event.  This will make host and device memory visible to other commands executing on the same device.  + +A stronger system-level fence can be specified when the event is created with hipEventCreateWithFlags: +- hipEventReleaseToSystem : Perform a system-scope release operation when the event is recorded.  This will make both Coherent and Non-Coherent host memory visible to other agents in the system, but may involve heavyweight operations such as cache flushing.  Coherent memory will typically use lighter-weight in-kernel synchronization mechanisms such as an atomic operation and thus does not need to use hipEventReleaseToSystem. + +### Summary and Recommendations: + +- Coherent host memory is the default and is the easiest to use since the memory is visible to the CPU at typical synchronization points. This memory allows in-kernel synchronization commands such as threadfence_system to work transparently. +- HIP/ROCm also supports the ability to cache host memory in the GPU using the "Non-Coherent" host memory allocations. This can provide performance benefit, but care must be taken to use the correct synchronization. + + +## Unpinned Memory Transfer Optimizations +Please note that this document lists possible ways for experimenting with HIP stack to gain performance. Performance may vary from platform to platform. + +### On Small BAR Setup + +There are two possible ways to transfer data from host-to-device (H2D) and device-to-host(D2H) + * Using Staging Buffers + * Using PinInPlace + +### On Large BAR Setup + +There are three possible ways to transfer data from host-to-device (H2D) + * Using Staging Buffers + * Using PinInPlace + * Direct Memcpy + + And there are two possible ways to transfer data from device-to-host (D2H) + * Using Staging Buffers + * Using PinInPlace + +Some GPUs may not be able to directly access host memory, and in these cases we need to +stage the copy through an optimized pinned staging buffer, to implement H2D and D2H copies.The copy is broken into buffer-sized chunks to limit the size of the buffer and also to provide better performance by overlapping the CPU copies with the DMA copies. + +PinInPlace is another algorithm which pins the host memory "in-place", and copies it with the DMA engine. + +By default staging buffers are used for unpinned memory transfers. Environment variables allow control over the unpinned copy algorithm and parameters: + +- HIP_PININPLACE - This environment variable forces the use of PinInPlace logic for all unpinned memory copies + +- HIP_OPTIMAL_MEM_TRANSFER- This environment variable enables a hybrid memory copy logic based on thresholds. These thresholds can be managed with following environment variables: + - HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE - Threshold in bytes for H2D copy. For sizes smaller than threshold staging buffers logic would be used else PinInPlace logic. + - HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING - Threshold in bytes for H2D copy. For sizes smaller than threshold direct copy logic would be used else staging buffers logic. + - HIP_D2H_MEM_TRANSFER_THRESHOLD - Threshold in bytes for D2H copy. For sizes smaller than threshold staging buffer logic would be used else PinInPlace logic. diff --git a/hipify-clang/src/Cuda2Hip.cpp b/hipify-clang/src/Cuda2Hip.cpp index 9b58173899..553ea5d8af 100644 --- a/hipify-clang/src/Cuda2Hip.cpp +++ b/hipify-clang/src/Cuda2Hip.cpp @@ -61,8 +61,9 @@ using namespace llvm; #define HIP_UNSUPPORTED -1 enum ConvTypes { - CONV_DRIVER = 0, - CONV_DEV, + CONV_VERSION = 0, + CONV_INIT, + CONV_DEVICE, CONV_MEM, CONV_KERN, CONV_COORD_FUNC, @@ -72,16 +73,23 @@ enum ConvTypes { CONV_EVENT, CONV_OCCUPANCY, CONV_CONTEXT, + CONV_PEER, CONV_MODULE, CONV_CACHE, CONV_EXEC, - CONV_ERR, + CONV_ERROR, CONV_DEF, CONV_TEX, CONV_GL, CONV_GRAPHICS, CONV_SURFACE, CONV_JIT, + CONV_D3D9, + CONV_D3D10, + CONV_D3D11, + CONV_VDPAU, + CONV_EGL, + CONV_THREAD, CONV_OTHER, CONV_INCLUDE, CONV_INCLUDE_CUDA_MAIN_H, @@ -92,11 +100,12 @@ enum ConvTypes { }; const char *counterNames[CONV_LAST] = { - "driver", "dev", "mem", "kern", "coord_func", "math_func", - "special_func", "stream", "event", "occupancy", "ctx", "module", - "cache", "exec", "err", "def", "tex", "gl", - "graphics", "surface", "jit", "other", "include", "include_cuda_main_header", - "type", "literal", "numeric_literal"}; + "version", "init", "device", "mem", "kern", "coord_func", "math_func", + "special_func", "stream", "event", "occupancy", "ctx", "peer", "module", + "cache", "exec", "err", "def", "tex", "gl", "graphics", + "surface", "jit", "d3d9", "d3d10", "d3d11", "vdpau", "egl", + "thread", "other", "include", "include_cuda_main_header", "type", "literal", + "numeric_literal"}; enum ApiTypes { API_DRIVER = 0, @@ -196,199 +205,199 @@ struct cuda2hipMap { cuda2hipRename["cudaError"] = {"hipError_t", CONV_TYPE, API_RUNTIME}; // CUDA Driver API error codes only - cuda2hipRename["CUDA_ERROR_INVALID_CONTEXT"] = {"hipErrorInvalidContext", CONV_ERR, API_DRIVER}; // 201 - cuda2hipRename["CUDA_ERROR_CONTEXT_ALREADY_CURRENT"] = {"hipErrorContextAlreadyCurrent", CONV_ERR, API_DRIVER}; // 202 - cuda2hipRename["CUDA_ERROR_ARRAY_IS_MAPPED"] = {"hipErrorArrayIsMapped", CONV_ERR, API_DRIVER}; // 207 - cuda2hipRename["CUDA_ERROR_ALREADY_MAPPED"] = {"hipErrorAlreadyMapped", CONV_ERR, API_DRIVER}; // 208 - cuda2hipRename["CUDA_ERROR_ALREADY_ACQUIRED"] = {"hipErrorAlreadyAcquired", CONV_ERR, API_DRIVER}; // 210 - cuda2hipRename["CUDA_ERROR_NOT_MAPPED"] = {"hipErrorNotMapped", CONV_ERR, API_DRIVER}; // 211 - cuda2hipRename["CUDA_ERROR_NOT_MAPPED_AS_ARRAY"] = {"hipErrorNotMappedAsArray", CONV_ERR, API_DRIVER}; // 212 - cuda2hipRename["CUDA_ERROR_NOT_MAPPED_AS_POINTER"] = {"hipErrorNotMappedAsPointer", CONV_ERR, API_DRIVER}; // 213 - cuda2hipRename["CUDA_ERROR_CONTEXT_ALREADY_IN_USE"] = {"hipErrorContextAlreadyInUse", CONV_ERR, API_DRIVER}; // 216 - cuda2hipRename["CUDA_ERROR_INVALID_SOURCE"] = {"hipErrorInvalidSource", CONV_ERR, API_DRIVER}; // 300 - cuda2hipRename["CUDA_ERROR_FILE_NOT_FOUND"] = {"hipErrorFileNotFound", CONV_ERR, API_DRIVER}; // 301 - cuda2hipRename["CUDA_ERROR_NOT_FOUND"] = {"hipErrorNotFound", CONV_ERR, API_DRIVER}; // 500 - cuda2hipRename["CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"] = {"hipErrorLaunchIncompatibleTexturing", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 703 - cuda2hipRename["CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"] = {"hipErrorPrimaryContextActive", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 708 - cuda2hipRename["CUDA_ERROR_CONTEXT_IS_DESTROYED"] = {"hipErrorContextIsDestroyed", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 709 - cuda2hipRename["CUDA_ERROR_NOT_PERMITTED"] = {"hipErrorNotPermitted", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 800 - cuda2hipRename["CUDA_ERROR_NOT_SUPPORTED"] = {"hipErrorNotSupported", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 801 + cuda2hipRename["CUDA_ERROR_INVALID_CONTEXT"] = {"hipErrorInvalidContext", CONV_TYPE, API_DRIVER}; // 201 + cuda2hipRename["CUDA_ERROR_CONTEXT_ALREADY_CURRENT"] = {"hipErrorContextAlreadyCurrent", CONV_TYPE, API_DRIVER}; // 202 + cuda2hipRename["CUDA_ERROR_ARRAY_IS_MAPPED"] = {"hipErrorArrayIsMapped", CONV_TYPE, API_DRIVER}; // 207 + cuda2hipRename["CUDA_ERROR_ALREADY_MAPPED"] = {"hipErrorAlreadyMapped", CONV_TYPE, API_DRIVER}; // 208 + cuda2hipRename["CUDA_ERROR_ALREADY_ACQUIRED"] = {"hipErrorAlreadyAcquired", CONV_TYPE, API_DRIVER}; // 210 + cuda2hipRename["CUDA_ERROR_NOT_MAPPED"] = {"hipErrorNotMapped", CONV_TYPE, API_DRIVER}; // 211 + cuda2hipRename["CUDA_ERROR_NOT_MAPPED_AS_ARRAY"] = {"hipErrorNotMappedAsArray", CONV_TYPE, API_DRIVER}; // 212 + cuda2hipRename["CUDA_ERROR_NOT_MAPPED_AS_POINTER"] = {"hipErrorNotMappedAsPointer", CONV_TYPE, API_DRIVER}; // 213 + cuda2hipRename["CUDA_ERROR_CONTEXT_ALREADY_IN_USE"] = {"hipErrorContextAlreadyInUse", CONV_TYPE, API_DRIVER}; // 216 + cuda2hipRename["CUDA_ERROR_INVALID_SOURCE"] = {"hipErrorInvalidSource", CONV_TYPE, API_DRIVER}; // 300 + cuda2hipRename["CUDA_ERROR_FILE_NOT_FOUND"] = {"hipErrorFileNotFound", CONV_TYPE, API_DRIVER}; // 301 + cuda2hipRename["CUDA_ERROR_NOT_FOUND"] = {"hipErrorNotFound", CONV_TYPE, API_DRIVER}; // 500 + cuda2hipRename["CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"] = {"hipErrorLaunchIncompatibleTexturing", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 703 + cuda2hipRename["CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"] = {"hipErrorPrimaryContextActive", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 708 + cuda2hipRename["CUDA_ERROR_CONTEXT_IS_DESTROYED"] = {"hipErrorContextIsDestroyed", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 709 + cuda2hipRename["CUDA_ERROR_NOT_PERMITTED"] = {"hipErrorNotPermitted", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 800 + cuda2hipRename["CUDA_ERROR_NOT_SUPPORTED"] = {"hipErrorNotSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 801 // CUDA RT API error code only - cuda2hipRename["cudaErrorMissingConfiguration"] = {"hipErrorMissingConfiguration", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 1 - cuda2hipRename["cudaErrorPriorLaunchFailure"] = {"hipErrorPriorLaunchFailure", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 5 - cuda2hipRename["cudaErrorInvalidDeviceFunction"] = {"hipErrorInvalidDeviceFunction", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 8 - cuda2hipRename["cudaErrorInvalidConfiguration"] = {"hipErrorInvalidConfiguration", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 9 - cuda2hipRename["cudaErrorInvalidPitchValue"] = {"hipErrorInvalidPitchValue", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 12 - cuda2hipRename["cudaErrorInvalidSymbol"] = {"hipErrorInvalidSymbol", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 13 - cuda2hipRename["cudaErrorInvalidHostPointer"] = {"hipErrorInvalidHostPointer", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 16 - cuda2hipRename["cudaErrorInvalidDevicePointer"] = {"hipErrorInvalidDevicePointer", CONV_ERR, API_RUNTIME}; // 17 - cuda2hipRename["cudaErrorInvalidTexture"] = {"hipErrorInvalidTexture", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 18 - cuda2hipRename["cudaErrorInvalidTextureBinding"] = {"hipErrorInvalidTextureBinding", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 19 - cuda2hipRename["cudaErrorInvalidChannelDescriptor"] = {"hipErrorInvalidChannelDescriptor", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 20 - cuda2hipRename["cudaErrorInvalidMemcpyDirection"] = {"hipErrorInvalidMemcpyDirection", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 21 - cuda2hipRename["cudaErrorAddressOfConstant"] = {"hipErrorAddressOfConstant", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 22 - cuda2hipRename["cudaErrorTextureFetchFailed"] = {"hipErrorTextureFetchFailed", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 23 - cuda2hipRename["cudaErrorTextureNotBound"] = {"hipErrorTextureNotBound", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 24 - cuda2hipRename["cudaErrorSynchronizationError"] = {"hipErrorSynchronizationError", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 25 - cuda2hipRename["cudaErrorInvalidFilterSetting"] = {"hipErrorInvalidFilterSetting", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 26 - cuda2hipRename["cudaErrorInvalidNormSetting"] = {"hipErrorInvalidNormSetting", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 27 - cuda2hipRename["cudaErrorMixedDeviceExecution"] = {"hipErrorMixedDeviceExecution", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 28 + cuda2hipRename["cudaErrorMissingConfiguration"] = {"hipErrorMissingConfiguration", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 1 + cuda2hipRename["cudaErrorPriorLaunchFailure"] = {"hipErrorPriorLaunchFailure", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 5 + cuda2hipRename["cudaErrorInvalidDeviceFunction"] = {"hipErrorInvalidDeviceFunction", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 8 + cuda2hipRename["cudaErrorInvalidConfiguration"] = {"hipErrorInvalidConfiguration", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 9 + cuda2hipRename["cudaErrorInvalidPitchValue"] = {"hipErrorInvalidPitchValue", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 12 + cuda2hipRename["cudaErrorInvalidSymbol"] = {"hipErrorInvalidSymbol", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 13 + cuda2hipRename["cudaErrorInvalidHostPointer"] = {"hipErrorInvalidHostPointer", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 16 + cuda2hipRename["cudaErrorInvalidDevicePointer"] = {"hipErrorInvalidDevicePointer", CONV_TYPE, API_RUNTIME}; // 17 + cuda2hipRename["cudaErrorInvalidTexture"] = {"hipErrorInvalidTexture", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 18 + cuda2hipRename["cudaErrorInvalidTextureBinding"] = {"hipErrorInvalidTextureBinding", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 19 + cuda2hipRename["cudaErrorInvalidChannelDescriptor"] = {"hipErrorInvalidChannelDescriptor", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 20 + cuda2hipRename["cudaErrorInvalidMemcpyDirection"] = {"hipErrorInvalidMemcpyDirection", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 21 + cuda2hipRename["cudaErrorAddressOfConstant"] = {"hipErrorAddressOfConstant", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 22 + cuda2hipRename["cudaErrorTextureFetchFailed"] = {"hipErrorTextureFetchFailed", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 23 + cuda2hipRename["cudaErrorTextureNotBound"] = {"hipErrorTextureNotBound", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 24 + cuda2hipRename["cudaErrorSynchronizationError"] = {"hipErrorSynchronizationError", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 25 + cuda2hipRename["cudaErrorInvalidFilterSetting"] = {"hipErrorInvalidFilterSetting", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 26 + cuda2hipRename["cudaErrorInvalidNormSetting"] = {"hipErrorInvalidNormSetting", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 27 + cuda2hipRename["cudaErrorMixedDeviceExecution"] = {"hipErrorMixedDeviceExecution", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 28 // Deprecated as of CUDA 4.1 - cuda2hipRename["cudaErrorNotYetImplemented"] = {"hipErrorNotYetImplemented", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 31 + cuda2hipRename["cudaErrorNotYetImplemented"] = {"hipErrorNotYetImplemented", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 31 // Deprecated as of CUDA 3.1 - cuda2hipRename["cudaErrorMemoryValueTooLarge"] = {"hipErrorMemoryValueTooLarge", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 32 - cuda2hipRename["cudaErrorInsufficientDriver"] = {"hipErrorInsufficientDriver", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 35 - cuda2hipRename["cudaErrorSetOnActiveProcess"] = {"hipErrorSetOnActiveProcess", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 36 - cuda2hipRename["cudaErrorInvalidSurface"] = {"hipErrorInvalidSurface", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 37 - cuda2hipRename["cudaErrorDuplicateVariableName"] = {"hipErrorDuplicateVariableName", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 43 - cuda2hipRename["cudaErrorDuplicateTextureName"] = {"hipErrorDuplicateTextureName", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 44 - cuda2hipRename["cudaErrorDuplicateSurfaceName"] = {"hipErrorDuplicateSurfaceName", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 45 - cuda2hipRename["cudaErrorDevicesUnavailable"] = {"hipErrorDevicesUnavailable", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 46 - cuda2hipRename["cudaErrorIncompatibleDriverContext"] = {"hipErrorIncompatibleDriverContext", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 49 - cuda2hipRename["cudaErrorDeviceAlreadyInUse"] = {"hipErrorDeviceAlreadyInUse", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 54 - cuda2hipRename["cudaErrorLaunchMaxDepthExceeded"] = {"hipErrorLaunchMaxDepthExceeded", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 65 - cuda2hipRename["cudaErrorLaunchFileScopedTex"] = {"hipErrorLaunchFileScopedTex", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 66 - cuda2hipRename["cudaErrorLaunchFileScopedSurf"] = {"hipErrorLaunchFileScopedSurf", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 67 - cuda2hipRename["cudaErrorSyncDepthExceeded"] = {"hipErrorSyncDepthExceeded", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 68 - cuda2hipRename["cudaErrorLaunchPendingCountExceeded"] = {"hipErrorLaunchPendingCountExceeded", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 69 - cuda2hipRename["cudaErrorNotPermitted"] = {"hipErrorNotPermitted", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 70 - cuda2hipRename["cudaErrorNotSupported"] = {"hipErrorNotSupported", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 71 - cuda2hipRename["cudaErrorStartupFailure"] = {"hipErrorStartupFailure", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 0x7f + cuda2hipRename["cudaErrorMemoryValueTooLarge"] = {"hipErrorMemoryValueTooLarge", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 32 + cuda2hipRename["cudaErrorInsufficientDriver"] = {"hipErrorInsufficientDriver", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 35 + cuda2hipRename["cudaErrorSetOnActiveProcess"] = {"hipErrorSetOnActiveProcess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 36 + cuda2hipRename["cudaErrorInvalidSurface"] = {"hipErrorInvalidSurface", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 37 + cuda2hipRename["cudaErrorDuplicateVariableName"] = {"hipErrorDuplicateVariableName", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 43 + cuda2hipRename["cudaErrorDuplicateTextureName"] = {"hipErrorDuplicateTextureName", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 44 + cuda2hipRename["cudaErrorDuplicateSurfaceName"] = {"hipErrorDuplicateSurfaceName", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 45 + cuda2hipRename["cudaErrorDevicesUnavailable"] = {"hipErrorDevicesUnavailable", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 46 + cuda2hipRename["cudaErrorIncompatibleDriverContext"] = {"hipErrorIncompatibleDriverContext", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 49 + cuda2hipRename["cudaErrorDeviceAlreadyInUse"] = {"hipErrorDeviceAlreadyInUse", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 54 + cuda2hipRename["cudaErrorLaunchMaxDepthExceeded"] = {"hipErrorLaunchMaxDepthExceeded", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 65 + cuda2hipRename["cudaErrorLaunchFileScopedTex"] = {"hipErrorLaunchFileScopedTex", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 66 + cuda2hipRename["cudaErrorLaunchFileScopedSurf"] = {"hipErrorLaunchFileScopedSurf", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 67 + cuda2hipRename["cudaErrorSyncDepthExceeded"] = {"hipErrorSyncDepthExceeded", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 68 + cuda2hipRename["cudaErrorLaunchPendingCountExceeded"] = {"hipErrorLaunchPendingCountExceeded", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 69 + cuda2hipRename["cudaErrorNotPermitted"] = {"hipErrorNotPermitted", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 70 + cuda2hipRename["cudaErrorNotSupported"] = {"hipErrorNotSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 71 + cuda2hipRename["cudaErrorStartupFailure"] = {"hipErrorStartupFailure", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x7f // Deprecated as of CUDA 4.1 - cuda2hipRename["cudaErrorApiFailureBase"] = {"hipErrorApiFailureBase", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 10000 + cuda2hipRename["cudaErrorApiFailureBase"] = {"hipErrorApiFailureBase", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 10000 - cuda2hipRename["CUDA_SUCCESS"] = {"hipSuccess", CONV_ERR, API_DRIVER}; // 0 - cuda2hipRename["cudaSuccess"] = {"hipSuccess", CONV_ERR, API_RUNTIME}; // 0 + cuda2hipRename["CUDA_SUCCESS"] = {"hipSuccess", CONV_TYPE, API_DRIVER}; // 0 + cuda2hipRename["cudaSuccess"] = {"hipSuccess", CONV_TYPE, API_RUNTIME}; // 0 - cuda2hipRename["CUDA_ERROR_INVALID_VALUE"] = {"hipErrorInvalidValue", CONV_ERR, API_DRIVER}; // 1 - cuda2hipRename["cudaErrorInvalidValue"] = {"hipErrorInvalidValue", CONV_ERR, API_RUNTIME}; // 11 + cuda2hipRename["CUDA_ERROR_INVALID_VALUE"] = {"hipErrorInvalidValue", CONV_TYPE, API_DRIVER}; // 1 + cuda2hipRename["cudaErrorInvalidValue"] = {"hipErrorInvalidValue", CONV_TYPE, API_RUNTIME}; // 11 - cuda2hipRename["CUDA_ERROR_OUT_OF_MEMORY"] = {"hipErrorMemoryAllocation", CONV_ERR, API_DRIVER}; // 2 - cuda2hipRename["cudaErrorMemoryAllocation"] = {"hipErrorMemoryAllocation", CONV_ERR, API_RUNTIME}; // 2 + cuda2hipRename["CUDA_ERROR_OUT_OF_MEMORY"] = {"hipErrorMemoryAllocation", CONV_TYPE, API_DRIVER}; // 2 + cuda2hipRename["cudaErrorMemoryAllocation"] = {"hipErrorMemoryAllocation", CONV_TYPE, API_RUNTIME}; // 2 - cuda2hipRename["CUDA_ERROR_NOT_INITIALIZED"] = {"hipErrorNotInitialized", CONV_ERR, API_DRIVER}; // 3 - cuda2hipRename["cudaErrorInitializationError"] = {"hipErrorInitializationError", CONV_ERR, API_RUNTIME}; // 3 + cuda2hipRename["CUDA_ERROR_NOT_INITIALIZED"] = {"hipErrorNotInitialized", CONV_TYPE, API_DRIVER}; // 3 + cuda2hipRename["cudaErrorInitializationError"] = {"hipErrorInitializationError", CONV_TYPE, API_RUNTIME}; // 3 - cuda2hipRename["CUDA_ERROR_DEINITIALIZED"] = {"hipErrorDeinitialized", CONV_ERR, API_DRIVER}; // 4 + cuda2hipRename["CUDA_ERROR_DEINITIALIZED"] = {"hipErrorDeinitialized", CONV_TYPE, API_DRIVER}; // 4 // TODO: double check, that these errors match - cuda2hipRename["cudaErrorCudartUnloading"] = {"hipErrorDeinitialized", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 29 + cuda2hipRename["cudaErrorCudartUnloading"] = {"hipErrorDeinitialized", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 29 - cuda2hipRename["CUDA_ERROR_PROFILER_DISABLED"] = {"hipErrorProfilerDisabled", CONV_ERR, API_DRIVER}; // 5 - cuda2hipRename["cudaErrorProfilerDisabled"] = {"hipErrorProfilerDisabled", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 55 + cuda2hipRename["CUDA_ERROR_PROFILER_DISABLED"] = {"hipErrorProfilerDisabled", CONV_TYPE, API_DRIVER}; // 5 + cuda2hipRename["cudaErrorProfilerDisabled"] = {"hipErrorProfilerDisabled", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 55 - cuda2hipRename["CUDA_ERROR_PROFILER_NOT_INITIALIZED"] = {"hipErrorProfilerNotInitialized", CONV_ERR, API_DRIVER}; // 6 + cuda2hipRename["CUDA_ERROR_PROFILER_NOT_INITIALIZED"] = {"hipErrorProfilerNotInitialized", CONV_TYPE, API_DRIVER}; // 6 // Deprecated as of CUDA 5.0 - cuda2hipRename["cudaErrorProfilerNotInitialized"] = {"hipErrorProfilerNotInitialized", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 56 + cuda2hipRename["cudaErrorProfilerNotInitialized"] = {"hipErrorProfilerNotInitialized", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 56 - cuda2hipRename["CUDA_ERROR_PROFILER_ALREADY_STARTED"] = {"hipErrorProfilerAlreadyStarted", CONV_ERR, API_DRIVER}; // 7 + cuda2hipRename["CUDA_ERROR_PROFILER_ALREADY_STARTED"] = {"hipErrorProfilerAlreadyStarted", CONV_TYPE, API_DRIVER}; // 7 // Deprecated as of CUDA 5.0 - cuda2hipRename["cudaErrorProfilerAlreadyStarted"] = {"hipErrorProfilerAlreadyStarted", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 57 + cuda2hipRename["cudaErrorProfilerAlreadyStarted"] = {"hipErrorProfilerAlreadyStarted", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 57 - cuda2hipRename["CUDA_ERROR_PROFILER_ALREADY_STOPPED"] = {"hipErrorProfilerAlreadyStopped", CONV_ERR, API_DRIVER}; // 8 + cuda2hipRename["CUDA_ERROR_PROFILER_ALREADY_STOPPED"] = {"hipErrorProfilerAlreadyStopped", CONV_TYPE, API_DRIVER}; // 8 // Deprecated as of CUDA 5.0 - cuda2hipRename["cudaErrorProfilerAlreadyStopped"] = {"hipErrorProfilerAlreadyStopped", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 58 + cuda2hipRename["cudaErrorProfilerAlreadyStopped"] = {"hipErrorProfilerAlreadyStopped", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 58 - cuda2hipRename["CUDA_ERROR_NO_DEVICE"] = {"hipErrorNoDevice", CONV_ERR, API_DRIVER}; // 100 - cuda2hipRename["cudaErrorNoDevice"] = {"hipErrorNoDevice", CONV_ERR, API_RUNTIME}; // 38 + cuda2hipRename["CUDA_ERROR_NO_DEVICE"] = {"hipErrorNoDevice", CONV_TYPE, API_DRIVER}; // 100 + cuda2hipRename["cudaErrorNoDevice"] = {"hipErrorNoDevice", CONV_TYPE, API_RUNTIME}; // 38 - cuda2hipRename["CUDA_ERROR_INVALID_DEVICE"] = {"hipErrorInvalidDevice", CONV_ERR, API_DRIVER}; // 101 - cuda2hipRename["cudaErrorInvalidDevice"] = {"hipErrorInvalidDevice", CONV_ERR, API_RUNTIME}; // 10 + cuda2hipRename["CUDA_ERROR_INVALID_DEVICE"] = {"hipErrorInvalidDevice", CONV_TYPE, API_DRIVER}; // 101 + cuda2hipRename["cudaErrorInvalidDevice"] = {"hipErrorInvalidDevice", CONV_TYPE, API_RUNTIME}; // 10 - cuda2hipRename["CUDA_ERROR_INVALID_IMAGE"] = {"hipErrorInvalidImage", CONV_ERR, API_DRIVER}; // 200 - cuda2hipRename["cudaErrorInvalidKernelImage"] = {"hipErrorInvalidImage", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 47 + cuda2hipRename["CUDA_ERROR_INVALID_IMAGE"] = {"hipErrorInvalidImage", CONV_TYPE, API_DRIVER}; // 200 + cuda2hipRename["cudaErrorInvalidKernelImage"] = {"hipErrorInvalidImage", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 47 - cuda2hipRename["CUDA_ERROR_MAP_FAILED"] = {"hipErrorMapFailed", CONV_ERR, API_DRIVER}; // 205 + cuda2hipRename["CUDA_ERROR_MAP_FAILED"] = {"hipErrorMapFailed", CONV_TYPE, API_DRIVER}; // 205 // TODO: double check, that these errors match - cuda2hipRename["cudaErrorMapBufferObjectFailed"] = {"hipErrorMapFailed", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 14 + cuda2hipRename["cudaErrorMapBufferObjectFailed"] = {"hipErrorMapFailed", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 14 - cuda2hipRename["CUDA_ERROR_UNMAP_FAILED"] = {"hipErrorUnmapFailed", CONV_ERR, API_DRIVER}; // 206 + cuda2hipRename["CUDA_ERROR_UNMAP_FAILED"] = {"hipErrorUnmapFailed", CONV_TYPE, API_DRIVER}; // 206 // TODO: double check, that these errors match - cuda2hipRename["cudaErrorUnmapBufferObjectFailed"] = {"hipErrorUnmapFailed", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 15 + cuda2hipRename["cudaErrorUnmapBufferObjectFailed"] = {"hipErrorUnmapFailed", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 15 - cuda2hipRename["CUDA_ERROR_NO_BINARY_FOR_GPU"] = {"hipErrorNoBinaryForGpu", CONV_ERR, API_DRIVER}; // 209 - cuda2hipRename["cudaErrorNoKernelImageForDevice"] = {"hipErrorNoBinaryForGpu", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 48 + cuda2hipRename["CUDA_ERROR_NO_BINARY_FOR_GPU"] = {"hipErrorNoBinaryForGpu", CONV_TYPE, API_DRIVER}; // 209 + cuda2hipRename["cudaErrorNoKernelImageForDevice"] = {"hipErrorNoBinaryForGpu", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 48 - cuda2hipRename["CUDA_ERROR_ECC_UNCORRECTABLE"] = {"hipErrorECCNotCorrectable", CONV_ERR, API_DRIVER}; // 214 - cuda2hipRename["cudaErrorECCUncorrectable"] = {"hipErrorECCNotCorrectable", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 39 + cuda2hipRename["CUDA_ERROR_ECC_UNCORRECTABLE"] = {"hipErrorECCNotCorrectable", CONV_TYPE, API_DRIVER}; // 214 + cuda2hipRename["cudaErrorECCUncorrectable"] = {"hipErrorECCNotCorrectable", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 39 - cuda2hipRename["CUDA_ERROR_UNSUPPORTED_LIMIT"] = {"hipErrorUnsupportedLimit", CONV_ERR, API_DRIVER}; // 215 - cuda2hipRename["cudaErrorUnsupportedLimit"] = {"hipErrorUnsupportedLimit", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 42 + cuda2hipRename["CUDA_ERROR_UNSUPPORTED_LIMIT"] = {"hipErrorUnsupportedLimit", CONV_TYPE, API_DRIVER}; // 215 + cuda2hipRename["cudaErrorUnsupportedLimit"] = {"hipErrorUnsupportedLimit", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 42 - cuda2hipRename["CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"] = {"hipErrorPeerAccessUnsupported", CONV_ERR, API_DRIVER}; // 217 - cuda2hipRename["cudaErrorPeerAccessUnsupported"] = {"hipErrorPeerAccessUnsupported", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 64 + cuda2hipRename["CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"] = {"hipErrorPeerAccessUnsupported", CONV_TYPE, API_DRIVER}; // 217 + cuda2hipRename["cudaErrorPeerAccessUnsupported"] = {"hipErrorPeerAccessUnsupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 64 - cuda2hipRename["CUDA_ERROR_INVALID_PTX"] = {"hipErrorInvalidKernelFile", CONV_ERR, API_DRIVER}; // 218 - cuda2hipRename["cudaErrorInvalidPtx"] = {"hipErrorInvalidKernelFile", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 78 + cuda2hipRename["CUDA_ERROR_INVALID_PTX"] = {"hipErrorInvalidKernelFile", CONV_TYPE, API_DRIVER}; // 218 + cuda2hipRename["cudaErrorInvalidPtx"] = {"hipErrorInvalidKernelFile", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 78 - cuda2hipRename["CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"] = {"hipErrorInvalidGraphicsContext", CONV_ERR, API_DRIVER}; // 219 - cuda2hipRename["cudaErrorInvalidGraphicsContext"] = {"hipErrorInvalidGraphicsContext", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 79 + cuda2hipRename["CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"] = {"hipErrorInvalidGraphicsContext", CONV_TYPE, API_DRIVER}; // 219 + cuda2hipRename["cudaErrorInvalidGraphicsContext"] = {"hipErrorInvalidGraphicsContext", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 79 - cuda2hipRename["CUDA_ERROR_NVLINK_UNCORRECTABLE"] = {"hipErrorNvlinkUncorrectable", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 220 [CUDA 8.0.44] - cuda2hipRename["cudaErrorNvlinkUncorrectable"] = {"hipErrorNvlinkUncorrectable", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 80 [CUDA 8.0.44] + cuda2hipRename["CUDA_ERROR_NVLINK_UNCORRECTABLE"] = {"hipErrorNvlinkUncorrectable", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 220 [CUDA 8.0.44] + cuda2hipRename["cudaErrorNvlinkUncorrectable"] = {"hipErrorNvlinkUncorrectable", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 80 [CUDA 8.0.44] - cuda2hipRename["CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"] = {"hipErrorSharedObjectSymbolNotFound", CONV_ERR, API_DRIVER}; // 302 - cuda2hipRename["cudaErrorSharedObjectSymbolNotFound"] = {"hipErrorSharedObjectSymbolNotFound", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 40 + cuda2hipRename["CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"] = {"hipErrorSharedObjectSymbolNotFound", CONV_TYPE, API_DRIVER}; // 302 + cuda2hipRename["cudaErrorSharedObjectSymbolNotFound"] = {"hipErrorSharedObjectSymbolNotFound", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 40 - cuda2hipRename["CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"] = {"hipErrorSharedObjectInitFailed", CONV_ERR, API_DRIVER}; // 303 - cuda2hipRename["cudaErrorSharedObjectInitFailed"] = {"hipErrorSharedObjectInitFailed", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 41 + cuda2hipRename["CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"] = {"hipErrorSharedObjectInitFailed", CONV_TYPE, API_DRIVER}; // 303 + cuda2hipRename["cudaErrorSharedObjectInitFailed"] = {"hipErrorSharedObjectInitFailed", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 41 - cuda2hipRename["CUDA_ERROR_OPERATING_SYSTEM"] = {"hipErrorOperatingSystem", CONV_ERR, API_DRIVER}; // 304 - cuda2hipRename["cudaErrorOperatingSystem"] = {"hipErrorOperatingSystem", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 63 + cuda2hipRename["CUDA_ERROR_OPERATING_SYSTEM"] = {"hipErrorOperatingSystem", CONV_TYPE, API_DRIVER}; // 304 + cuda2hipRename["cudaErrorOperatingSystem"] = {"hipErrorOperatingSystem", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 63 - cuda2hipRename["CUDA_ERROR_INVALID_HANDLE"] = {"hipErrorInvalidResourceHandle", CONV_ERR, API_DRIVER}; // 400 - cuda2hipRename["cudaErrorInvalidResourceHandle"] = {"hipErrorInvalidResourceHandle", CONV_ERR, API_RUNTIME}; // 33 + cuda2hipRename["CUDA_ERROR_INVALID_HANDLE"] = {"hipErrorInvalidResourceHandle", CONV_TYPE, API_DRIVER}; // 400 + cuda2hipRename["cudaErrorInvalidResourceHandle"] = {"hipErrorInvalidResourceHandle", CONV_TYPE, API_RUNTIME}; // 33 - cuda2hipRename["CUDA_ERROR_NOT_READY"] = {"hipErrorNotReady", CONV_ERR, API_DRIVER}; // 600 - cuda2hipRename["cudaErrorNotReady"] = {"hipErrorNotReady", CONV_ERR, API_RUNTIME}; // 34 + cuda2hipRename["CUDA_ERROR_NOT_READY"] = {"hipErrorNotReady", CONV_TYPE, API_DRIVER}; // 600 + cuda2hipRename["cudaErrorNotReady"] = {"hipErrorNotReady", CONV_TYPE, API_RUNTIME}; // 34 - cuda2hipRename["CUDA_ERROR_ILLEGAL_ADDRESS"] = {"hipErrorIllegalAddress", CONV_ERR, API_DRIVER}; // 700 - cuda2hipRename["cudaErrorIllegalAddress"] = {"hipErrorIllegalAddress", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 77 + cuda2hipRename["CUDA_ERROR_ILLEGAL_ADDRESS"] = {"hipErrorIllegalAddress", CONV_TYPE, API_DRIVER}; // 700 + cuda2hipRename["cudaErrorIllegalAddress"] = {"hipErrorIllegalAddress", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 77 - cuda2hipRename["CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"] = {"hipErrorLaunchOutOfResources", CONV_ERR, API_DRIVER}; // 701 - cuda2hipRename["cudaErrorLaunchOutOfResources"] = {"hipErrorLaunchOutOfResources", CONV_ERR, API_RUNTIME}; // 7 + cuda2hipRename["CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"] = {"hipErrorLaunchOutOfResources", CONV_TYPE, API_DRIVER}; // 701 + cuda2hipRename["cudaErrorLaunchOutOfResources"] = {"hipErrorLaunchOutOfResources", CONV_TYPE, API_RUNTIME}; // 7 - cuda2hipRename["CUDA_ERROR_LAUNCH_TIMEOUT"] = {"hipErrorLaunchTimeOut", CONV_ERR, API_DRIVER}; // 702 - cuda2hipRename["cudaErrorLaunchTimeout"] = {"hipErrorLaunchTimeOut", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 6 + cuda2hipRename["CUDA_ERROR_LAUNCH_TIMEOUT"] = {"hipErrorLaunchTimeOut", CONV_TYPE, API_DRIVER}; // 702 + cuda2hipRename["cudaErrorLaunchTimeout"] = {"hipErrorLaunchTimeOut", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 6 - cuda2hipRename["CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"] = {"hipErrorPeerAccessAlreadyEnabled", CONV_ERR, API_DRIVER}; // 704 - cuda2hipRename["cudaErrorPeerAccessAlreadyEnabled"] = {"hipErrorPeerAccessAlreadyEnabled", CONV_ERR, API_RUNTIME}; // 50 + cuda2hipRename["CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"] = {"hipErrorPeerAccessAlreadyEnabled", CONV_TYPE, API_DRIVER}; // 704 + cuda2hipRename["cudaErrorPeerAccessAlreadyEnabled"] = {"hipErrorPeerAccessAlreadyEnabled", CONV_TYPE, API_RUNTIME}; // 50 - cuda2hipRename["CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"] = {"hipErrorPeerAccessNotEnabled", CONV_ERR, API_DRIVER}; // 705 - cuda2hipRename["cudaErrorPeerAccessNotEnabled"] = {"hipErrorPeerAccessNotEnabled", CONV_ERR, API_RUNTIME}; // 51 + cuda2hipRename["CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"] = {"hipErrorPeerAccessNotEnabled", CONV_TYPE, API_DRIVER}; // 705 + cuda2hipRename["cudaErrorPeerAccessNotEnabled"] = {"hipErrorPeerAccessNotEnabled", CONV_TYPE, API_RUNTIME}; // 51 - cuda2hipRename["CUDA_ERROR_ASSERT"] = {"hipErrorAssert", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 710 - cuda2hipRename["cudaErrorAssert"] = {"hipErrorAssert", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 59 + cuda2hipRename["CUDA_ERROR_ASSERT"] = {"hipErrorAssert", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 710 + cuda2hipRename["cudaErrorAssert"] = {"hipErrorAssert", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 59 - cuda2hipRename["CUDA_ERROR_TOO_MANY_PEERS"] = {"hipErrorTooManyPeers", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 711 - cuda2hipRename["cudaErrorTooManyPeers"] = {"hipErrorTooManyPeers", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 60 + cuda2hipRename["CUDA_ERROR_TOO_MANY_PEERS"] = {"hipErrorTooManyPeers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 711 + cuda2hipRename["cudaErrorTooManyPeers"] = {"hipErrorTooManyPeers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 60 - cuda2hipRename["CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"] = {"hipErrorHostMemoryAlreadyRegistered", CONV_ERR, API_DRIVER}; // 712 - cuda2hipRename["cudaErrorHostMemoryAlreadyRegistered"] = {"hipErrorHostMemoryAlreadyRegistered", CONV_ERR, API_RUNTIME}; // 61 + cuda2hipRename["CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"] = {"hipErrorHostMemoryAlreadyRegistered", CONV_TYPE, API_DRIVER}; // 712 + cuda2hipRename["cudaErrorHostMemoryAlreadyRegistered"] = {"hipErrorHostMemoryAlreadyRegistered", CONV_TYPE, API_RUNTIME}; // 61 - cuda2hipRename["CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"] = {"hipErrorHostMemoryNotRegistered", CONV_ERR, API_DRIVER}; // 713 - cuda2hipRename["cudaErrorHostMemoryNotRegistered"] = {"hipErrorHostMemoryNotRegistered", CONV_ERR, API_RUNTIME}; // 62 + cuda2hipRename["CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"] = {"hipErrorHostMemoryNotRegistered", CONV_TYPE, API_DRIVER}; // 713 + cuda2hipRename["cudaErrorHostMemoryNotRegistered"] = {"hipErrorHostMemoryNotRegistered", CONV_TYPE, API_RUNTIME}; // 62 - cuda2hipRename["CUDA_ERROR_HARDWARE_STACK_ERROR"] = {"hipErrorHardwareStackError", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 714 - cuda2hipRename["cudaErrorHardwareStackError"] = {"hipErrorHardwareStackError", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 72 + cuda2hipRename["CUDA_ERROR_HARDWARE_STACK_ERROR"] = {"hipErrorHardwareStackError", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 714 + cuda2hipRename["cudaErrorHardwareStackError"] = {"hipErrorHardwareStackError", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 72 - cuda2hipRename["CUDA_ERROR_ILLEGAL_INSTRUCTION"] = {"hipErrorIllegalInstruction", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 715 - cuda2hipRename["cudaErrorIllegalInstruction"] = {"hipErrorIllegalInstruction", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 73 + cuda2hipRename["CUDA_ERROR_ILLEGAL_INSTRUCTION"] = {"hipErrorIllegalInstruction", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 715 + cuda2hipRename["cudaErrorIllegalInstruction"] = {"hipErrorIllegalInstruction", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 73 - cuda2hipRename["CUDA_ERROR_MISALIGNED_ADDRESS"] = {"hipErrorMisalignedAddress", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 716 - cuda2hipRename["cudaErrorMisalignedAddress"] = {"hipErrorMisalignedAddress", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 74 + cuda2hipRename["CUDA_ERROR_MISALIGNED_ADDRESS"] = {"hipErrorMisalignedAddress", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 716 + cuda2hipRename["cudaErrorMisalignedAddress"] = {"hipErrorMisalignedAddress", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 74 - cuda2hipRename["CUDA_ERROR_INVALID_ADDRESS_SPACE"] = {"hipErrorInvalidAddressSpace", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 717 - cuda2hipRename["cudaErrorInvalidAddressSpace"] = {"hipErrorInvalidAddressSpace", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 75 + cuda2hipRename["CUDA_ERROR_INVALID_ADDRESS_SPACE"] = {"hipErrorInvalidAddressSpace", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 717 + cuda2hipRename["cudaErrorInvalidAddressSpace"] = {"hipErrorInvalidAddressSpace", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 75 - cuda2hipRename["CUDA_ERROR_INVALID_PC"] = {"hipErrorInvalidPc", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 718 - cuda2hipRename["cudaErrorInvalidPc"] = {"hipErrorInvalidPc", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 76 + cuda2hipRename["CUDA_ERROR_INVALID_PC"] = {"hipErrorInvalidPc", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 718 + cuda2hipRename["cudaErrorInvalidPc"] = {"hipErrorInvalidPc", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 76 - cuda2hipRename["CUDA_ERROR_LAUNCH_FAILED"] = {"hipErrorLaunchFailure", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 719 - cuda2hipRename["cudaErrorLaunchFailure"] = {"hipErrorLaunchFailure", CONV_ERR, API_RUNTIME, HIP_UNSUPPORTED}; // 4 + cuda2hipRename["CUDA_ERROR_LAUNCH_FAILED"] = {"hipErrorLaunchFailure", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 719 + cuda2hipRename["cudaErrorLaunchFailure"] = {"hipErrorLaunchFailure", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 4 - cuda2hipRename["CUDA_ERROR_UNKNOWN"] = {"hipErrorUnknown", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // 999 - cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR, API_RUNTIME}; // 30 + cuda2hipRename["CUDA_ERROR_UNKNOWN"] = {"hipErrorUnknown", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 999 + cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_TYPE, API_RUNTIME}; // 30 ///////////////////////////// CUDA DRIVER API ///////////////////////////// // structs @@ -428,11 +437,11 @@ struct cuda2hipMap { cuda2hipRename["CU_AD_FORMAT_HALF"] = {"HIP_AD_FORMAT_HALF", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x10 cuda2hipRename["CU_AD_FORMAT_FLOAT"] = {"HIP_AD_FORMAT_FLOAT", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x20 // Compute mode - cuda2hipRename["CUcomputemode"] = {"hipComputemode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // API_RUNTIME ANALOGUE (cudaComputeMode) - cuda2hipRename["CU_COMPUTEMODE_DEFAULT"] = {"hipComputeModeDefault", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0) - cuda2hipRename["CU_COMPUTEMODE_EXCLUSIVE"] = {"hipComputeModeExclusive", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1) - cuda2hipRename["CU_COMPUTEMODE_PROHIBITED"] = {"hipComputeModeProhibited", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2) - cuda2hipRename["CU_COMPUTEMODE_EXCLUSIVE_PROCESS"] = {"hipComputeModeExclusiveProcess", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3) + cuda2hipRename["CUcomputemode"] = {"hipComputemode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // API_RUNTIME ANALOGUE (cudaComputeMode) + cuda2hipRename["CU_COMPUTEMODE_DEFAULT"] = {"hipComputeModeDefault", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0) + cuda2hipRename["CU_COMPUTEMODE_EXCLUSIVE"] = {"hipComputeModeExclusive", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_RUNTIME ANALOGUE (cudaComputeModeExclusive = 1) + cuda2hipRename["CU_COMPUTEMODE_PROHIBITED"] = {"hipComputeModeProhibited", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_RUNTIME ANALOGUE (cudaComputeModeProhibited = 2) + cuda2hipRename["CU_COMPUTEMODE_EXCLUSIVE_PROCESS"] = {"hipComputeModeExclusiveProcess", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_RUNTIME ANALOGUE (cudaComputeModeExclusiveProcess = 3) // unsupported yet by HIP [CUDA 8.0.44] // Memory advise values @@ -465,31 +474,31 @@ struct cuda2hipMap { cuda2hipRename["CU_CTX_FLAGS_MASK"] = {"HIP_CTX_FLAGS_MASK", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x1f // Defines - cuda2hipRename["CU_LAUNCH_PARAM_BUFFER_POINTER"] = {"HIP_LAUNCH_PARAM_BUFFER_POINTER", CONV_DEV, API_DRIVER}; // ((void*)0x01) - cuda2hipRename["CU_LAUNCH_PARAM_BUFFER_SIZE"] = {"HIP_LAUNCH_PARAM_BUFFER_SIZE", CONV_DEV, API_DRIVER}; // ((void*)0x02) - cuda2hipRename["CU_LAUNCH_PARAM_END"] = {"HIP_LAUNCH_PARAM_END", CONV_DEV, API_DRIVER}; // ((void*)0x00) - cuda2hipRename["CU_IPC_HANDLE_SIZE"] = {"HIP_LAUNCH_PARAM_END", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 64 - cuda2hipRename["CU_MEMHOSTALLOC_DEVICEMAP"] = {"HIP_MEMHOSTALLOC_DEVICEMAP", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 - cuda2hipRename["CU_MEMHOSTALLOC_PORTABLE"] = {"HIP_MEMHOSTALLOC_PORTABLE", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CU_MEMHOSTALLOC_WRITECOMBINED"] = {"HIP_MEMHOSTALLOC_WRITECOMBINED", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 - cuda2hipRename["CU_MEMHOSTREGISTER_DEVICEMAP"] = {"HIP_MEMHOSTREGISTER_DEVICEMAP", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 - cuda2hipRename["CU_MEMHOSTREGISTER_IOMEMORY"] = {"HIP_MEMHOSTREGISTER_IOMEMORY", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 - cuda2hipRename["CU_MEMHOSTREGISTER_PORTABLE"] = {"HIP_MEMHOSTREGISTER_PORTABLE", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CU_PARAM_TR_DEFAULT"] = {"HIP_PARAM_TR_DEFAULT", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // -1 - cuda2hipRename["CU_STREAM_LEGACY"] = {"HIP_STREAM_LEGACY", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // ((CUstream)0x1) - cuda2hipRename["CU_STREAM_PER_THREAD"] = {"HIP_STREAM_PER_THREAD", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // ((CUstream)0x2) - cuda2hipRename["CU_TRSA_OVERRIDE_FORMAT"] = {"HIP_TRSA_OVERRIDE_FORMAT", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CU_TRSF_NORMALIZED_COORDINATES"] = {"HIP_TRSF_NORMALIZED_COORDINATES", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED};// 0x02 - cuda2hipRename["CU_TRSF_READ_AS_INTEGER"] = {"HIP_TRSF_READ_AS_INTEGER", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CU_TRSF_SRGB"] = {"HIP_TRSF_SRGB", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x10 + cuda2hipRename["CU_LAUNCH_PARAM_BUFFER_POINTER"] = {"HIP_LAUNCH_PARAM_BUFFER_POINTER", CONV_TYPE, API_DRIVER}; // ((void*)0x01) + cuda2hipRename["CU_LAUNCH_PARAM_BUFFER_SIZE"] = {"HIP_LAUNCH_PARAM_BUFFER_SIZE", CONV_TYPE, API_DRIVER}; // ((void*)0x02) + cuda2hipRename["CU_LAUNCH_PARAM_END"] = {"HIP_LAUNCH_PARAM_END", CONV_TYPE, API_DRIVER}; // ((void*)0x00) + cuda2hipRename["CU_IPC_HANDLE_SIZE"] = {"HIP_LAUNCH_PARAM_END", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 64 + cuda2hipRename["CU_MEMHOSTALLOC_DEVICEMAP"] = {"HIP_MEMHOSTALLOC_DEVICEMAP", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 + cuda2hipRename["CU_MEMHOSTALLOC_PORTABLE"] = {"HIP_MEMHOSTALLOC_PORTABLE", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CU_MEMHOSTALLOC_WRITECOMBINED"] = {"HIP_MEMHOSTALLOC_WRITECOMBINED", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 + cuda2hipRename["CU_MEMHOSTREGISTER_DEVICEMAP"] = {"HIP_MEMHOSTREGISTER_DEVICEMAP", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 + cuda2hipRename["CU_MEMHOSTREGISTER_IOMEMORY"] = {"HIP_MEMHOSTREGISTER_IOMEMORY", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 + cuda2hipRename["CU_MEMHOSTREGISTER_PORTABLE"] = {"HIP_MEMHOSTREGISTER_PORTABLE", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CU_PARAM_TR_DEFAULT"] = {"HIP_PARAM_TR_DEFAULT", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // -1 + cuda2hipRename["CU_STREAM_LEGACY"] = {"HIP_STREAM_LEGACY", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // ((CUstream)0x1) + cuda2hipRename["CU_STREAM_PER_THREAD"] = {"HIP_STREAM_PER_THREAD", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // ((CUstream)0x2) + cuda2hipRename["CU_TRSA_OVERRIDE_FORMAT"] = {"HIP_TRSA_OVERRIDE_FORMAT", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CU_TRSF_NORMALIZED_COORDINATES"] = {"HIP_TRSF_NORMALIZED_COORDINATES", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED};// 0x02 + cuda2hipRename["CU_TRSF_READ_AS_INTEGER"] = {"HIP_TRSF_READ_AS_INTEGER", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CU_TRSF_SRGB"] = {"HIP_TRSF_SRGB", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x10 // Deprecated, use CUDA_ARRAY3D_LAYERED - cuda2hipRename["CUDA_ARRAY3D_2DARRAY"] = {"HIP_ARRAY3D_LAYERED", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CUDA_ARRAY3D_CUBEMAP"] = {"HIP_ARRAY3D_CUBEMAP", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 - cuda2hipRename["CUDA_ARRAY3D_DEPTH_TEXTURE"] = {"HIP_ARRAY3D_DEPTH_TEXTURE", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x10 - cuda2hipRename["CUDA_ARRAY3D_LAYERED"] = {"HIP_ARRAY3D_LAYERED", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 - cuda2hipRename["CUDA_ARRAY3D_SURFACE_LDST"] = {"HIP_ARRAY3D_SURFACE_LDST", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 - cuda2hipRename["CUDA_ARRAY3D_TEXTURE_GATHER"] = {"HIP_ARRAY3D_TEXTURE_GATHER", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x08 - cuda2hipRename["CUDA_VERSION"] = {"HIP_VERSION", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 7050 + cuda2hipRename["CUDA_ARRAY3D_2DARRAY"] = {"HIP_ARRAY3D_LAYERED", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CUDA_ARRAY3D_CUBEMAP"] = {"HIP_ARRAY3D_CUBEMAP", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x04 + cuda2hipRename["CUDA_ARRAY3D_DEPTH_TEXTURE"] = {"HIP_ARRAY3D_DEPTH_TEXTURE", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x10 + cuda2hipRename["CUDA_ARRAY3D_LAYERED"] = {"HIP_ARRAY3D_LAYERED", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 + cuda2hipRename["CUDA_ARRAY3D_SURFACE_LDST"] = {"HIP_ARRAY3D_SURFACE_LDST", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 + cuda2hipRename["CUDA_ARRAY3D_TEXTURE_GATHER"] = {"HIP_ARRAY3D_TEXTURE_GATHER", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x08 + cuda2hipRename["CUDA_VERSION"] = {"HIP_VERSION", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 7050 // Types // NOTE: CUdevice might be changed to typedef int in the future. @@ -500,115 +509,115 @@ struct cuda2hipMap { // CUDA: "The types::CUarray and struct ::cudaArray * represent the same data type and may be used interchangeably by casting the two types between each other." // typedef struct cudaArray *cudaArray_t; // typedef struct CUarray_st *CUarray; - cuda2hipRename["CUarray_st"] = {"hipArray", CONV_MEM, API_RUNTIME}; // API_Runtime ANALOGUE (cudaArray) + cuda2hipRename["CUarray_st"] = {"hipArray", CONV_TYPE, API_DRIVER}; // API_Runtime ANALOGUE (cudaArray) cuda2hipRename["CUarray"] = {"hipArray *", CONV_TYPE, API_DRIVER}; // API_Runtime ANALOGUE (cudaArray_t) // unsupported yet by HIP - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK"] = {"hipDeviceAttributeMaxThreadsPerBlock", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_Runtime ANALOGUE (cudaDevAttrMaxThreadsPerBlock = 1) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X"] = {"hipDeviceAttributeMaxBlockDimX", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimX = 2) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y"] = {"hipDeviceAttributeMaxBlockDimY", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimY = 3) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z"] = {"hipDeviceAttributeMaxBlockDimZ", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 4 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimZ = 4) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X"] = {"hipDeviceAttributeMaxGridDimX", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 5 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimX =5) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y"] = {"hipDeviceAttributeMaxGridDimY", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 6 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimY = 6) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z"] = {"hipDeviceAttributeMaxGridDimZ", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 7 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimZ - 7) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 8 // API_Runtime ANALOGUE (cudaDevAttrMaxSharedMemoryPerBlock = 8) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK"] = {"hipDeviceAttributeMaxThreadsPerBlock", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_Runtime ANALOGUE (cudaDevAttrMaxThreadsPerBlock = 1) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X"] = {"hipDeviceAttributeMaxBlockDimX", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimX = 2) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y"] = {"hipDeviceAttributeMaxBlockDimY", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimY = 3) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z"] = {"hipDeviceAttributeMaxBlockDimZ", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 4 // API_Runtime ANALOGUE (cudaDevAttrMaxBlockDimZ = 4) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X"] = {"hipDeviceAttributeMaxGridDimX", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 5 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimX =5) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y"] = {"hipDeviceAttributeMaxGridDimY", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 6 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimY = 6) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z"] = {"hipDeviceAttributeMaxGridDimZ", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 7 // API_Runtime ANALOGUE (cudaDevAttrMaxGridDimZ - 7) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 8 // API_Runtime ANALOGUE (cudaDevAttrMaxSharedMemoryPerBlock = 8) // Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK - cuda2hipRename["CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 8 - cuda2hipRename["CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY"] = {"hipDeviceAttributeTotalConstantMemory", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 9 // API_Runtime ANALOGUE (cudaDevAttrTotalConstantMemory = 9) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_WARP_SIZE"] = {"hipDeviceAttributeWarpSize", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 10 // API_Runtime ANALOGUE (cudaDevAttrWarpSize = 10) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_PITCH"] = {"hipDeviceAttributeMaxPitch", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 11 // API_Runtime ANALOGUE (cudaDevAttrMaxPitch = 11) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 12 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerBlock = 12) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 12 - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CLOCK_RATE"] = {"hipDeviceAttributeClockRate", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 13 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerBlock = 13) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT"] = {"hipDeviceAttributeTextureAlignment", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 14 // API_Runtime ANALOGUE (cudaDevAttrTextureAlignment = 14) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 8 + cuda2hipRename["CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY"] = {"hipDeviceAttributeTotalConstantMemory", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 9 // API_Runtime ANALOGUE (cudaDevAttrTotalConstantMemory = 9) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_WARP_SIZE"] = {"hipDeviceAttributeWarpSize", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 10 // API_Runtime ANALOGUE (cudaDevAttrWarpSize = 10) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_PITCH"] = {"hipDeviceAttributeMaxPitch", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 11 // API_Runtime ANALOGUE (cudaDevAttrMaxPitch = 11) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 12 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerBlock = 12) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 12 + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CLOCK_RATE"] = {"hipDeviceAttributeClockRate", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 13 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerBlock = 13) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT"] = {"hipDeviceAttributeTextureAlignment", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 14 // API_Runtime ANALOGUE (cudaDevAttrTextureAlignment = 14) // Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT - cuda2hipRename["CU_DEVICE_ATTRIBUTE_GPU_OVERLAP"] = {"hipDeviceAttributeAsyncEngineCount", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 15 // API_Runtime ANALOGUE (cudaDevAttrGpuOverlap = 15) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT"] = {"hipDeviceAttributeMultiprocessorCount", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 16 // API_Runtime ANALOGUE (cudaDevAttrMultiProcessorCount = 16) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT"] = {"hipDeviceAttributeKernelExecTimeout", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 17 // API_Runtime ANALOGUE (cudaDevAttrKernelExecTimeout = 17) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_INTEGRATED"] = {"hipDeviceAttributeIntegrated", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 18 // API_Runtime ANALOGUE (cudaDevAttrIntegrated = 18) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY"] = {"hipDeviceAttributeCanMapHostMemory", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 19 // API_Runtime ANALOGUE (cudaDevAttrCanMapHostMemory = 19) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_MODE"] = {"hipDeviceAttributeComputeMode", CONV_DEV, API_DRIVER}; // 20 // API_Runtime ANALOGUE (cudaDevAttrComputeMode = 20) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH"] = {"hipDeviceAttributeMaxTexture1DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 21 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DWidth = 21) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH"] = {"hipDeviceAttributeMaxTexture2DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 22 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DWidth = 22) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 23 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DHeight = 23) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH"] = {"hipDeviceAttributeMaxTexture3DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 24 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DWidth = 24) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT"] = {"hipDeviceAttributeMaxTexture3DHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 25 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DHeight = 25) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH"] = {"hipDeviceAttributeMaxTexture3DDepth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 26 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DDepth = 26) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 27 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredWidth = 27) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 28 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredHeight = 28) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 29 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredLayers = 29) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_GPU_OVERLAP"] = {"hipDeviceAttributeAsyncEngineCount", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 15 // API_Runtime ANALOGUE (cudaDevAttrGpuOverlap = 15) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT"] = {"hipDeviceAttributeMultiprocessorCount", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 16 // API_Runtime ANALOGUE (cudaDevAttrMultiProcessorCount = 16) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT"] = {"hipDeviceAttributeKernelExecTimeout", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 17 // API_Runtime ANALOGUE (cudaDevAttrKernelExecTimeout = 17) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_INTEGRATED"] = {"hipDeviceAttributeIntegrated", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 18 // API_Runtime ANALOGUE (cudaDevAttrIntegrated = 18) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY"] = {"hipDeviceAttributeCanMapHostMemory", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 19 // API_Runtime ANALOGUE (cudaDevAttrCanMapHostMemory = 19) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_MODE"] = {"hipDeviceAttributeComputeMode", CONV_TYPE, API_DRIVER}; // 20 // API_Runtime ANALOGUE (cudaDevAttrComputeMode = 20) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH"] = {"hipDeviceAttributeMaxTexture1DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 21 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DWidth = 21) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH"] = {"hipDeviceAttributeMaxTexture2DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 22 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DWidth = 22) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 23 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DHeight = 23) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH"] = {"hipDeviceAttributeMaxTexture3DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 24 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DWidth = 24) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT"] = {"hipDeviceAttributeMaxTexture3DHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 25 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DHeight = 25) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH"] = {"hipDeviceAttributeMaxTexture3DDepth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 26 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DDepth = 26) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 27 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredWidth = 27) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 28 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredHeight = 28) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 29 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredLayers = 29) // Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 27 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredWidth = 27) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 27 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredWidth = 27) // Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 28 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredHeight = 28) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 28 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredHeight = 28) // Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 29 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredLayers = 29) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT"] = {"hipDeviceAttributeSurfaceAlignment", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 30 // API_Runtime ANALOGUE (cudaDevAttrSurfaceAlignment = 30) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS"] = {"hipDeviceAttributeConcurrentKernels", CONV_DEV, API_DRIVER}; // 31 // API_Runtime ANALOGUE (cudaDevAttrConcurrentKernels = 31) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_ECC_ENABLED"] = {"hipDeviceAttributeEccEnabled", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 32 // API_Runtime ANALOGUE (cudaDevAttrEccEnabled = 32) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_BUS_ID"] = {"hipDeviceAttributePciBusId", CONV_DEV, API_DRIVER}; // 33 // API_Runtime ANALOGUE (cudaDevAttrPciBusId = 33) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID"] = {"hipDeviceAttributePciDeviceId", CONV_DEV, API_DRIVER}; // 34 // API_Runtime ANALOGUE (cudaDevAttrPciDeviceId = 34) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_TCC_DRIVER"] = {"hipDeviceAttributeTccDriver", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 35 // API_Runtime ANALOGUE (cudaDevAttrTccDriver = 35) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE"] = {"hipDeviceAttributeMemoryClockRate", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 36 // API_Runtime ANALOGUE (cudaDevAttrMemoryClockRate = 36) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH"] = {"hipDeviceAttributeMemoryBusWidth", CONV_DEV, API_DRIVER}; // 37 // API_Runtime ANALOGUE (cudaDevAttrGlobalMemoryBusWidth = 37) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE"] = {"hipDeviceAttributeL2CacheSize", CONV_DEV, API_DRIVER}; // 38 // API_Runtime ANALOGUE (cudaDevAttrL2CacheSize = 38) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxThreadsPerMultiProcessor", CONV_DEV, API_DRIVER}; // 39 // API_Runtime ANALOGUE (cudaDevAttrMaxThreadsPerMultiProcessor = 39) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT"] = {"hipDeviceAttributeAsyncEngineCount", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 40 // API_Runtime ANALOGUE (cudaDevAttrAsyncEngineCount = 40) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING"] = {"hipDeviceAttributeUnifiedAddressing", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 41 // API_Runtime ANALOGUE (cudaDevAttrUnifiedAddressing = 41) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTexture1DLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 42 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLayeredWidth = 42) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTexture1DLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 43 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLayeredLayers = 43) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 29 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLayeredLayers = 29) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT"] = {"hipDeviceAttributeSurfaceAlignment", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 30 // API_Runtime ANALOGUE (cudaDevAttrSurfaceAlignment = 30) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS"] = {"hipDeviceAttributeConcurrentKernels", CONV_TYPE, API_DRIVER}; // 31 // API_Runtime ANALOGUE (cudaDevAttrConcurrentKernels = 31) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_ECC_ENABLED"] = {"hipDeviceAttributeEccEnabled", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 32 // API_Runtime ANALOGUE (cudaDevAttrEccEnabled = 32) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_BUS_ID"] = {"hipDeviceAttributePciBusId", CONV_TYPE, API_DRIVER}; // 33 // API_Runtime ANALOGUE (cudaDevAttrPciBusId = 33) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID"] = {"hipDeviceAttributePciDeviceId", CONV_TYPE, API_DRIVER}; // 34 // API_Runtime ANALOGUE (cudaDevAttrPciDeviceId = 34) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_TCC_DRIVER"] = {"hipDeviceAttributeTccDriver", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 35 // API_Runtime ANALOGUE (cudaDevAttrTccDriver = 35) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE"] = {"hipDeviceAttributeMemoryClockRate", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 36 // API_Runtime ANALOGUE (cudaDevAttrMemoryClockRate = 36) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH"] = {"hipDeviceAttributeMemoryBusWidth", CONV_TYPE, API_DRIVER}; // 37 // API_Runtime ANALOGUE (cudaDevAttrGlobalMemoryBusWidth = 37) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE"] = {"hipDeviceAttributeL2CacheSize", CONV_TYPE, API_DRIVER}; // 38 // API_Runtime ANALOGUE (cudaDevAttrL2CacheSize = 38) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxThreadsPerMultiProcessor", CONV_TYPE, API_DRIVER}; // 39 // API_Runtime ANALOGUE (cudaDevAttrMaxThreadsPerMultiProcessor = 39) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT"] = {"hipDeviceAttributeAsyncEngineCount", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 40 // API_Runtime ANALOGUE (cudaDevAttrAsyncEngineCount = 40) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING"] = {"hipDeviceAttributeUnifiedAddressing", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 41 // API_Runtime ANALOGUE (cudaDevAttrUnifiedAddressing = 41) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTexture1DLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 42 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLayeredWidth = 42) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTexture1DLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 43 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLayeredLayers = 43) // deprecated, do not use - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER"] = {"hipDeviceAttributeCanTex2DGather", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 44 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH"] = {"hipDeviceAttributeMaxTexture2DGatherWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 45 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DGatherWidth = 45) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DGatherHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 46 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DGatherHeight = 46) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DWidthAlternate", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 47 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DWidthAlt = 47) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DHeightAlternate", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 48 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DHeightAlt = 48) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DDepthAlternate", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 49 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DDepthAlt = 49) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID"] = {"hipDeviceAttributePciDomainId", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 50 // API_Runtime ANALOGUE (cudaDevAttrPciDomainId = 50) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT"] = {"hipDeviceAttributeTexturePitchAlignment", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 51 // API_Runtime ANALOGUE (cudaDevAttrTexturePitchAlignment = 51) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH"] = {"hipDeviceAttributeMaxTextureCubemapWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 52 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapWidth = 52) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 53 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapLayeredWidth = 53) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 54 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapLayeredLayers = 54) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH"] = {"hipDeviceAttributeMaxSurface1DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 55 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DWidth = 55) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH"] = {"hipDeviceAttributeMaxSurface2DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 56 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DWidth = 56) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT"] = {"hipDeviceAttributeMaxSurface2DHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 57 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DHeight = 57) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH"] = {"hipDeviceAttributeMaxSurface3DWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 58 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DWidth = 58) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT"] = {"hipDeviceAttributeMaxSurface3DHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 59 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DHeight = 59) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH"] = {"hipDeviceAttributeMaxSurface3DDepth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 60 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DDepth = 60) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurface1DLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 61 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DLayeredWidth = 61) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurface1DLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 62 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DLayeredLayers = 62) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurface2DLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 63 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredWidth = 63) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT"] = {"hipDeviceAttributeMaxSurface2DLayeredHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 64 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredHeight = 64) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurface2DLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 65 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredLayers = 65) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH"] = {"hipDeviceAttributeMaxSurfaceCubemapWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 66 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapWidth = 66) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 67 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 68 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH"] = {"hipDeviceAttributeMaxTexture1DLinearWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 69 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLinearWidth = 69) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLinearWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 70 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearWidth = 70) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLinearHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 71 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearHeight = 71) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH"] = {"hipDeviceAttributeMaxTexture2DLinearPitch", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 72 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearPitch = 72) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH"] = {"hipDeviceAttributeMaxTexture2DMipmappedWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 73 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DMipmappedWidth = 73) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DMipmappedHeight", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 74 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DMipmappedHeight = 74) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"] = {"hipDeviceAttributeComputeCapabilityMajor", CONV_DEV, API_DRIVER}; // 75 // API_Runtime ANALOGUE (cudaDevAttrComputeCapabilityMajor = 75) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"] = {"hipDeviceAttributeComputeCapabilityMinor", CONV_DEV, API_DRIVER}; // 76 // API_Runtime ANALOGUE (cudaDevAttrComputeCapabilityMinor = 76) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH"] = {"hipDeviceAttributeMaxTexture1DMipmappedWidth", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 77 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DMipmappedWidth = 77) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED"] = {"hipDeviceAttributeStreamPrioritiesSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 78 // API_Runtime ANALOGUE (cudaDevAttrStreamPrioritiesSupported = 78) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED"] = {"hipDeviceAttributeGlobalL1CacheSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 79 // API_Runtime ANALOGUE (cudaDevAttrGlobalL1CacheSupported = 79) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED"] = {"hipDeviceAttributeLocalL1CacheSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 80 // API_Runtime ANALOGUE (cudaDevAttrLocalL1CacheSupported = 80) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", CONV_DEV, API_DRIVER}; // 81 // API_Runtime ANALOGUE (cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxRegistersPerMultiprocessor", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 82 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerMultiprocessor = 82) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY"] = {"hipDeviceAttributeManagedMemory", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 83 // API_Runtime ANALOGUE (cudaDevAttrManagedMemory = 83) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD"] = {"hipDeviceAttributeIsMultiGpuBoard", CONV_DEV, API_DRIVER}; // 84 // API_Runtime ANALOGUE (cudaDevAttrIsMultiGpuBoard = 84) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID"] = {"hipDeviceAttributeMultiGpuBoardGroupId", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 85 // API_Runtime ANALOGUE (cudaDevAttrMultiGpuBoardGroupID = 85) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER"] = {"hipDeviceAttributeCanTex2DGather", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 44 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH"] = {"hipDeviceAttributeMaxTexture2DGatherWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 45 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DGatherWidth = 45) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DGatherHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 46 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DGatherHeight = 46) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DWidthAlternate", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 47 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DWidthAlt = 47) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DHeightAlternate", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 48 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DHeightAlt = 48) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE"] = {"hipDeviceAttributeMaxTexture3DDepthAlternate", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 49 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture3DDepthAlt = 49) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID"] = {"hipDeviceAttributePciDomainId", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 50 // API_Runtime ANALOGUE (cudaDevAttrPciDomainId = 50) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT"] = {"hipDeviceAttributeTexturePitchAlignment", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 51 // API_Runtime ANALOGUE (cudaDevAttrTexturePitchAlignment = 51) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH"] = {"hipDeviceAttributeMaxTextureCubemapWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 52 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapWidth = 52) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 53 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapLayeredWidth = 53) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 54 // API_Runtime ANALOGUE (cudaDevAttrMaxTextureCubemapLayeredLayers = 54) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH"] = {"hipDeviceAttributeMaxSurface1DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 55 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DWidth = 55) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH"] = {"hipDeviceAttributeMaxSurface2DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 56 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DWidth = 56) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT"] = {"hipDeviceAttributeMaxSurface2DHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 57 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DHeight = 57) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH"] = {"hipDeviceAttributeMaxSurface3DWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 58 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DWidth = 58) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT"] = {"hipDeviceAttributeMaxSurface3DHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 59 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DHeight = 59) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH"] = {"hipDeviceAttributeMaxSurface3DDepth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 60 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface3DDepth = 60) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurface1DLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 61 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DLayeredWidth = 61) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurface1DLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 62 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface1DLayeredLayers = 62) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurface2DLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 63 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredWidth = 63) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT"] = {"hipDeviceAttributeMaxSurface2DLayeredHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 64 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredHeight = 64) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurface2DLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 65 // API_Runtime ANALOGUE (cudaDevAttrMaxSurface2DLayeredLayers = 65) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH"] = {"hipDeviceAttributeMaxSurfaceCubemapWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 66 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapWidth = 66) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 67 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 68 // API_Runtime ANALOGUE (cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH"] = {"hipDeviceAttributeMaxTexture1DLinearWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 69 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DLinearWidth = 69) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH"] = {"hipDeviceAttributeMaxTexture2DLinearWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 70 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearWidth = 70) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DLinearHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 71 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearHeight = 71) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH"] = {"hipDeviceAttributeMaxTexture2DLinearPitch", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 72 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DLinearPitch = 72) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH"] = {"hipDeviceAttributeMaxTexture2DMipmappedWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 73 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DMipmappedWidth = 73) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT"] = {"hipDeviceAttributeMaxTexture2DMipmappedHeight", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 74 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture2DMipmappedHeight = 74) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR"] = {"hipDeviceAttributeComputeCapabilityMajor", CONV_TYPE, API_DRIVER}; // 75 // API_Runtime ANALOGUE (cudaDevAttrComputeCapabilityMajor = 75) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR"] = {"hipDeviceAttributeComputeCapabilityMinor", CONV_TYPE, API_DRIVER}; // 76 // API_Runtime ANALOGUE (cudaDevAttrComputeCapabilityMinor = 76) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH"] = {"hipDeviceAttributeMaxTexture1DMipmappedWidth", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 77 // API_Runtime ANALOGUE (cudaDevAttrMaxTexture1DMipmappedWidth = 77) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED"] = {"hipDeviceAttributeStreamPrioritiesSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 78 // API_Runtime ANALOGUE (cudaDevAttrStreamPrioritiesSupported = 78) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED"] = {"hipDeviceAttributeGlobalL1CacheSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 79 // API_Runtime ANALOGUE (cudaDevAttrGlobalL1CacheSupported = 79) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED"] = {"hipDeviceAttributeLocalL1CacheSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 80 // API_Runtime ANALOGUE (cudaDevAttrLocalL1CacheSupported = 80) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", CONV_TYPE, API_DRIVER}; // 81 // API_Runtime ANALOGUE (cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR"] = {"hipDeviceAttributeMaxRegistersPerMultiprocessor", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 82 // API_Runtime ANALOGUE (cudaDevAttrMaxRegistersPerMultiprocessor = 82) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY"] = {"hipDeviceAttributeManagedMemory", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 83 // API_Runtime ANALOGUE (cudaDevAttrManagedMemory = 83) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD"] = {"hipDeviceAttributeIsMultiGpuBoard", CONV_TYPE, API_DRIVER}; // 84 // API_Runtime ANALOGUE (cudaDevAttrIsMultiGpuBoard = 84) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID"] = {"hipDeviceAttributeMultiGpuBoardGroupId", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 85 // API_Runtime ANALOGUE (cudaDevAttrMultiGpuBoardGroupID = 85) // unsupported yet by HIP [CUDA 8.0.44] - cuda2hipRename["CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED"] = {"hipDeviceAttributeHostNativeAtomicSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 86 // API_Runtime ANALOGUE (cudaDevAttrHostNativeAtomicSupported = 86) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO"] = {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 87 // API_Runtime ANALOGUE (cudaDevAttrSingleToDoublePrecisionPerfRatio = 87) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS"] = {"hipDeviceAttributePageableMemoryAccess", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 88 // API_Runtime ANALOGUE (cudaDevAttrPageableMemoryAccess = 88) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS"] = {"hipDeviceAttributeConcurrentManagedAccess", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 89 // API_Runtime ANALOGUE (cudaDevAttrConcurrentManagedAccess = 89) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED"] = {"hipDeviceAttributeComputePreemptionSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 90 // API_Runtime ANALOGUE (cudaDevAttrComputePreemptionSupported = 90) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM"] = {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 91 // API_Runtime ANALOGUE (cudaDevAttrCanUseHostPointerForRegisteredMem = 91) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED"] = {"hipDeviceAttributeHostNativeAtomicSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 86 // API_Runtime ANALOGUE (cudaDevAttrHostNativeAtomicSupported = 86) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO"] = {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 87 // API_Runtime ANALOGUE (cudaDevAttrSingleToDoublePrecisionPerfRatio = 87) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS"] = {"hipDeviceAttributePageableMemoryAccess", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 88 // API_Runtime ANALOGUE (cudaDevAttrPageableMemoryAccess = 88) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS"] = {"hipDeviceAttributeConcurrentManagedAccess", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 89 // API_Runtime ANALOGUE (cudaDevAttrConcurrentManagedAccess = 89) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED"] = {"hipDeviceAttributeComputePreemptionSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 90 // API_Runtime ANALOGUE (cudaDevAttrComputePreemptionSupported = 90) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM"] = {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 91 // API_Runtime ANALOGUE (cudaDevAttrCanUseHostPointerForRegisteredMem = 91) - cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX"] = {"hipDeviceAttributeMax", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 92 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_DEVICE_ATTRIBUTE_MAX"] = {"hipDeviceAttributeMax", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 92 // API_Runtime ANALOGUE (no) cuda2hipRename["CUdevprop_st"] = {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUdevprop"] = {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}; @@ -617,14 +626,14 @@ struct cuda2hipMap { // TODO: Do for Pointer Attributes the same as for Device Attributes. // cuda2hipRename["CUpointer_attribute_enum"] = {"hipPointerAttribute_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no) // cuda2hipRename["CUpointer_attribute"] = {"hipPointerAttribute_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_CONTEXT"] = {"hipPointerAttributeContext", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_MEMORY_TYPE"] = {"hipPointerAttributeMemoryType", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_DEVICE_POINTER"] = {"hipPointerAttributeDevicePointer", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_HOST_POINTER"] = {"hipPointerAttributeHostPointer", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 4 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_P2P_TOKENS"] = {"hipPointerAttributeP2pTokens", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 5 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_SYNC_MEMOPS"] = {"hipPointerAttributeSyncMemops", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 6 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_BUFFER_ID"] = {"hipPointerAttributeBufferId", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 7 // API_Runtime ANALOGUE (no) - cuda2hipRename["CU_POINTER_ATTRIBUTE_IS_MANAGED"] = {"hipPointerAttributeIsManaged", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 8 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_CONTEXT"] = {"hipPointerAttributeContext", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_MEMORY_TYPE"] = {"hipPointerAttributeMemoryType", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 2 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_DEVICE_POINTER"] = {"hipPointerAttributeDevicePointer", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 3 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_HOST_POINTER"] = {"hipPointerAttributeHostPointer", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 4 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_P2P_TOKENS"] = {"hipPointerAttributeP2pTokens", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 5 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_SYNC_MEMOPS"] = {"hipPointerAttributeSyncMemops", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 6 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_BUFFER_ID"] = {"hipPointerAttributeBufferId", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 7 // API_Runtime ANALOGUE (no) + cuda2hipRename["CU_POINTER_ATTRIBUTE_IS_MANAGED"] = {"hipPointerAttributeIsManaged", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 8 // API_Runtime ANALOGUE (no) // pointer to CUfunc_st cuda2hipRename["CUfunction"] = {"hipFunction_t", CONV_TYPE, API_DRIVER}; @@ -826,9 +835,9 @@ struct cuda2hipMap { cuda2hipRename["CUsharedconfig"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUsharedconfig_enum"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER}; - cuda2hipRename["CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE"] = {"hipSharedMemBankSizeDefault", CONV_DEV, API_DRIVER}; - cuda2hipRename["CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE"] = {"hipSharedMemBankSizeFourByte", CONV_DEV, API_DRIVER}; - cuda2hipRename["CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE"] = {"hipSharedMemBankSizeEightByte", CONV_DEV, API_DRIVER}; + cuda2hipRename["CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE"] = {"hipSharedMemBankSizeDefault", CONV_TYPE, API_DRIVER}; + cuda2hipRename["CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE"] = {"hipSharedMemBankSizeFourByte", CONV_TYPE, API_DRIVER}; + cuda2hipRename["CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE"] = {"hipSharedMemBankSizeEightByte", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUcontext"] = {"hipCtx_t", CONV_TYPE, API_DRIVER}; // TODO: move "typedef struct ihipCtx_t *hipCtx_t;" from hcc_details to HIP @@ -857,40 +866,40 @@ struct cuda2hipMap { // cuda2hipRename["CUtexref_st"] = {"ihipTextureReference_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // Stream Flags enum - cuda2hipRename["CUstream_flags"] = {"hipStreamFlags", CONV_STREAM, API_DRIVER}; - // cuda2hipRename["CUstream_flags_enum"] = {"hipStreamFlags", CONV_STREAM, API_DRIVER}; - cuda2hipRename["CU_STREAM_DEFAULT"] = {"hipStreamDefault", CONV_STREAM, API_DRIVER}; - cuda2hipRename["CU_STREAM_NON_BLOCKING"] = {"hipStreamNonBlocking", CONV_STREAM, API_DRIVER}; + cuda2hipRename["CUstream_flags"] = {"hipStreamFlags", CONV_TYPE, API_DRIVER}; + // cuda2hipRename["CUstream_flags_enum"] = {"hipStreamFlags", CONV_TYPE, API_DRIVER}; + cuda2hipRename["CU_STREAM_DEFAULT"] = {"hipStreamDefault", CONV_TYPE, API_DRIVER}; + cuda2hipRename["CU_STREAM_NON_BLOCKING"] = {"hipStreamNonBlocking", CONV_TYPE, API_DRIVER}; // unsupported yet by HIP [CUDA 8.0.44] // Flags for ::cuStreamWaitValue32 - cuda2hipRename["CUstreamWaitValue_flags"] = {"hipStreamWaitValueFlags", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - // cuda2hipRename["CUstreamWaitValue_flags_enum"] = {"hipStreamWaitValueFlags", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_STREAM_WAIT_VALUE_GEQ"] = {"hipStreamWaitValueGeq", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 0x0 - cuda2hipRename["CU_STREAM_WAIT_VALUE_EQ"] = {"hipStreamWaitValueEq", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 0x1 - cuda2hipRename["CU_STREAM_WAIT_VALUE_AND"] = {"hipStreamWaitValueAnd", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 0x2 - cuda2hipRename["CU_STREAM_WAIT_VALUE_FLUSH"] = {"hipStreamWaitValueFlush", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 1<<30 + cuda2hipRename["CUstreamWaitValue_flags"] = {"hipStreamWaitValueFlags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + // cuda2hipRename["CUstreamWaitValue_flags_enum"] = {"hipStreamWaitValueFlags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_STREAM_WAIT_VALUE_GEQ"] = {"hipStreamWaitValueGeq", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x0 + cuda2hipRename["CU_STREAM_WAIT_VALUE_EQ"] = {"hipStreamWaitValueEq", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x1 + cuda2hipRename["CU_STREAM_WAIT_VALUE_AND"] = {"hipStreamWaitValueAnd", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x2 + cuda2hipRename["CU_STREAM_WAIT_VALUE_FLUSH"] = {"hipStreamWaitValueFlush", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 1<<30 // Flags for ::cuStreamWriteValue32 - cuda2hipRename["CUstreamWriteValue_flags"] = {"hipStreamWriteValueFlags", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - // cuda2hipRename["CUstreamWriteValue_flags"] = {"hipStreamWriteValueFlags", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_STREAM_WRITE_VALUE_DEFAULT"] = {"hipStreamWriteValueDefault", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 0x0 - cuda2hipRename["CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER"] = {"hipStreamWriteValueNoMemoryBarrier", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 0x1 + cuda2hipRename["CUstreamWriteValue_flags"] = {"hipStreamWriteValueFlags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + // cuda2hipRename["CUstreamWriteValue_flags"] = {"hipStreamWriteValueFlags", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_STREAM_WRITE_VALUE_DEFAULT"] = {"hipStreamWriteValueDefault", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x0 + cuda2hipRename["CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER"] = {"hipStreamWriteValueNoMemoryBarrier", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x1 // Flags for ::cuStreamBatchMemOp - cuda2hipRename["CUstreamBatchMemOpType"] = {"hipStreamBatchMemOpType", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - // cuda2hipRename["CUstreamBatchMemOpType_enum"] = {"hipStreamBatchMemOpType", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_STREAM_MEM_OP_WAIT_VALUE_32"] = {"hipStreamBatchMemOpWaitValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 1 - cuda2hipRename["CU_STREAM_MEM_OP_WRITE_VALUE_32"] = {"hipStreamBatchMemOpWriteValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 2 - cuda2hipRename["CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES"] = {"hipStreamBatchMemOpFlushRemoteWrites", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // 3 + cuda2hipRename["CUstreamBatchMemOpType"] = {"hipStreamBatchMemOpType", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + // cuda2hipRename["CUstreamBatchMemOpType_enum"] = {"hipStreamBatchMemOpType", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_STREAM_MEM_OP_WAIT_VALUE_32"] = {"hipStreamBatchMemOpWaitValue32", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 1 + cuda2hipRename["CU_STREAM_MEM_OP_WRITE_VALUE_32"] = {"hipStreamBatchMemOpWriteValue32", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 2 + cuda2hipRename["CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES"] = {"hipStreamBatchMemOpFlushRemoteWrites", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 3 // Error Handling - cuda2hipRename["cuGetErrorName"] = {"hipGetErrorName___", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // cudaGetErrorName (hipGetErrorName) has different signature - cuda2hipRename["cuGetErrorString"] = {"hipGetErrorString___", CONV_ERR, API_DRIVER, HIP_UNSUPPORTED}; // cudaGetErrorString (hipGetErrorString) has different signature + cuda2hipRename["cuGetErrorName"] = {"hipGetErrorName___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}; // cudaGetErrorName (hipGetErrorName) has different signature + cuda2hipRename["cuGetErrorString"] = {"hipGetErrorString___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}; // cudaGetErrorString (hipGetErrorString) has different signature // Init - cuda2hipRename["cuInit"] = {"hipInit", CONV_DRIVER, API_DRIVER}; + cuda2hipRename["cuInit"] = {"hipInit", CONV_INIT, API_DRIVER}; // Driver - cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_DRIVER}; + cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_VERSION, API_DRIVER}; // Context Management cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}; @@ -915,10 +924,10 @@ struct cuda2hipMap { cuda2hipRename["cuCtxDetach"] = {"hipCtxDetach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}; // Peer Context Memory Access - cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_CONTEXT, API_DRIVER}; - cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_CONTEXT, API_DRIVER}; - cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_DRIVER}; - + cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_PEER, API_DRIVER}; + cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_PEER, API_DRIVER}; + cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_PEER, API_DRIVER}; + cuda2hipRename["cuDeviceGetP2PAttribute"] = {"hipDeviceGetP2PAttribute", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaDeviceGetP2PAttribute) // Primary Context Management cuda2hipRename["cuDevicePrimaryCtxGetState"] = {"hipDevicePrimaryCtxGetState", CONV_CONTEXT, API_DRIVER}; @@ -928,28 +937,28 @@ struct cuda2hipMap { cuda2hipRename["cuDevicePrimaryCtxSetFlags"] = {"hipDevicePrimaryCtxSetFlags", CONV_CONTEXT, API_DRIVER}; // Device Management - cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}; // Device Management [DEPRECATED] - cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEVICE, API_DRIVER}; + cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_DEVICE, API_DRIVER}; // Module Management - cuda2hipRename["cuLinkAddData"] = {"hipLinkAddData", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuLinkAddFile"] = {"hipLinkAddFile", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuLinkComplete"] = {"hipLinkComplete", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuLinkCreate"] = {"hipLinkCreate", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuLinkDestroy"] = {"hipLinkDestroy", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLinkAddData"] = {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLinkAddFile"] = {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLinkComplete"] = {"hipLinkComplete", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLinkCreate"] = {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLinkDestroy"] = {"hipLinkDestroy", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuModuleGetFunction"] = {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuModuleGetGlobal_v2"] = {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}; - cuda2hipRename["cuModuleGetSurfRef"] = {"hipModuleGetSurfRef", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuModuleGetTexRef"] = {"hipModuleGetTexRef", CONV_EVENT, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuModuleGetSurfRef"] = {"hipModuleGetSurfRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuModuleGetTexRef"] = {"hipModuleGetTexRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}; @@ -958,13 +967,11 @@ struct cuda2hipMap { // unsupported yet by HIP [CUDA 8.0.44] // P2P Attributes - cuda2hipRename["CUdevice_P2PAttribute"] = {"hipDeviceP2PAttribute", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaDeviceP2PAttr) - // cuda2hipRename["CUdevice_P2PAttribute_enum"] = {"hipDeviceP2PAttribute", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK"] = {"hipDeviceP2PAttributePerformanceRank", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaDevP2PAttrPerformanceRank = 0x01) - cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED"] = {"hipDeviceP2PAttributeAccessSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaDevP2PAttrAccessSupported = 0x02) - cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED"] = {"hipDeviceP2PAttributeNativeAtomicSupported", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaDevP2PAttrNativeAtomicSupported = 0x03) - - cuda2hipRename["cuDeviceGetP2PAttribute"] = {"hipDeviceGetP2PAttribute", CONV_DEV, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaDeviceGetP2PAttribute) + cuda2hipRename["CUdevice_P2PAttribute"] = {"hipDeviceP2PAttribute", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaDeviceP2PAttr) + // cuda2hipRename["CUdevice_P2PAttribute_enum"] = {"hipDeviceP2PAttribute", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK"] = {"hipDeviceP2PAttributePerformanceRank", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaDevP2PAttrPerformanceRank = 0x01) + cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED"] = {"hipDeviceP2PAttributeAccessSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaDevP2PAttrAccessSupported = 0x02) + cuda2hipRename["CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED"] = {"hipDeviceP2PAttributeNativeAtomicSupported", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaDevP2PAttrNativeAtomicSupported = 0x03) // Events // pointer to CUevent_st @@ -993,8 +1000,26 @@ struct cuda2hipMap { cuda2hipRename["cuFuncSetSharedMemConfig"] = {"hipFuncSetSharedMemConfig", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuLaunchKernel"] = {"hipModuleLaunchKernel", CONV_MODULE, API_DRIVER}; + // Execution Control [DEPRECATED] + cuda2hipRename["cuFuncSetBlockShape"] = {"hipFuncSetBlockShape", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuFuncSetSharedSize"] = {"hipFuncSetSharedSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLaunch"] = {"hipLaunch", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaLaunch) + cuda2hipRename["cuLaunchGrid"] = {"hipLaunchGrid", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuLaunchGridAsync"] = {"hipLaunchGridAsync", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuParamSetf"] = {"hipParamSetf", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuParamSeti"] = {"hipParamSeti", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuParamSetSize"] = {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuParamSetSize"] = {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuParamSetv"] = {"hipParamSetv", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + + // Occupancy + cuda2hipRename["cuOccupancyMaxActiveBlocksPerMultiprocessor"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}; // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessor) + cuda2hipRename["cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) + cuda2hipRename["cuOccupancyMaxPotentialBlockSize"] = {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}; // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSize) + cuda2hipRename["cuOccupancyMaxPotentialBlockSizeWithFlags"] = {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSizeWithFlags) + // Streams - cuda2hipRename["cuStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}; cuda2hipRename["cuStreamAttachMemAsync"] = {"hipStreamAttachMemAsync", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuStreamCreate"] = {"hipStreamCreate__", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaStreamCreate due to different signatures cuda2hipRename["cuStreamCreateWithPriority"] = {"hipStreamCreateWithPriority", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; @@ -1028,10 +1053,10 @@ struct cuda2hipMap { cuda2hipRename["cuMemcpy2DAsync"] = {"hipMemcpy2DAsync__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy2DAsync due to different signatures cuda2hipRename["cuMemcpy2DUnaligned"] = {"hipMemcpy2DUnaligned", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuMemcpy3D"] = {"hipMemcpy3D__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3D due to different signatures - cuda2hipRename["cuMemcpy3DAsync"] = {"hipMemcpy3DAsync__", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DAsync due to different signatures - cuda2hipRename["cuMemcpy3DPeer"] = {"hipMemcpy3DPeer__", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DPeer due to different signatures - cuda2hipRename["cuMemcpy3DPeerAsync"] = {"hipMemcpy3DPeerAsync__", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DPeerAsync due to different signatures - cuda2hipRename["cuMemcpyAsync"] = {"hipMemcpyAsync__", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cudaMemcpyAsync due to different signatures + cuda2hipRename["cuMemcpy3DAsync"] = {"hipMemcpy3DAsync__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DAsync due to different signatures + cuda2hipRename["cuMemcpy3DPeer"] = {"hipMemcpy3DPeer__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DPeer due to different signatures + cuda2hipRename["cuMemcpy3DPeerAsync"] = {"hipMemcpy3DPeerAsync__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpy3DPeerAsync due to different signatures + cuda2hipRename["cuMemcpyAsync"] = {"hipMemcpyAsync__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaMemcpyAsync due to different signatures cuda2hipRename["cuMemcpyAtoA"] = {"hipMemcpyAtoA", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuMemcpyAtoD"] = {"hipMemcpyAtoD", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuMemcpyAtoH"] = {"hipMemcpyAtoH", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; @@ -1056,30 +1081,30 @@ struct cuda2hipMap { cuda2hipRename["cuMemHostGetFlags"] = {"hipMemHostGetFlags", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuMemHostRegister_v2"] = {"hipHostRegister", CONV_MEM, API_DRIVER}; // API_Runtime ANALOGUE (cudaHostAlloc) cuda2hipRename["cuMemHostUnregister"] = {"hipHostUnregister", CONV_MEM, API_DRIVER}; // API_Runtime ANALOGUE (cudaHostUnregister) - cuda2hipRename["cuMemsetD16_v2"] = {"hipMemsetD16", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD16Async"] = {"hipMemsetD16Async", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D16_v2"] = {"hipMemsetD2D16", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D16Async"] = {"hipMemsetD2D16Async", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D32_v2"] = {"hipMemsetD2D32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D32Async"] = {"hipMemsetD2D32Async", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D8_v2"] = {"hipMemsetD2D8", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD2D8Async"] = {"hipMemsetD2D8Async", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD16_v2"] = {"hipMemsetD16", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD16Async"] = {"hipMemsetD16Async", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D16_v2"] = {"hipMemsetD2D16", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D16Async"] = {"hipMemsetD2D16Async", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D32_v2"] = {"hipMemsetD2D32", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D32Async"] = {"hipMemsetD2D32Async", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D8_v2"] = {"hipMemsetD2D8", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD2D8Async"] = {"hipMemsetD2D8Async", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuMemsetD32_v2"] = {"hipMemset", CONV_MEM, API_DRIVER}; // API_Runtime ANALOGUE (cudaMemset) cuda2hipRename["cuMemsetD32Async"] = {"hipMemsetAsync", CONV_MEM, API_DRIVER}; // API_Runtime ANALOGUE (cudaMemsetAsync) - cuda2hipRename["cuMemsetD8_v2"] = {"hipMemsetD8", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMemsetD8Async"] = {"hipMemsetD8Async", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMipmappedArrayCreate"] = {"hipMipmappedArrayCreate", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMipmappedArrayDestroy"] = {"hipMipmappedArrayDestroy", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cuMipmappedArrayGetLevel"] = {"hipMipmappedArrayGetLevel", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD8_v2"] = {"hipMemsetD8", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemsetD8Async"] = {"hipMemsetD8Async", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMipmappedArrayCreate"] = {"hipMipmappedArrayCreate", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMipmappedArrayDestroy"] = {"hipMipmappedArrayDestroy", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuMipmappedArrayGetLevel"] = {"hipMipmappedArrayGetLevel", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Unified Addressing - cuda2hipRename["cuMemPrefetchAsync"] = {"hipMemPrefetchAsync__", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE (cudaMemPrefetchAsync has different signature) - cuda2hipRename["cuMemAdvise"] = {"hipMemAdvise", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemAdvise) - cuda2hipRename["cuMemRangeGetAttribute"] = {"hipMemRangeGetAttribute", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemRangeGetAttribute) - cuda2hipRename["cuMemRangeGetAttributes"] = {"hipMemRangeGetAttributes", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemRangeGetAttributes) - cuda2hipRename["cuPointerGetAttribute"] = {"hipPointerGetAttribute", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cuPointerGetAttributes"] = {"hipPointerGetAttributes", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cuPointerSetAttribute"] = {"hipPointerSetAttribute", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cuMemPrefetchAsync"] = {"hipMemPrefetchAsync__", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE (cudaMemPrefetchAsync has different signature) + cuda2hipRename["cuMemAdvise"] = {"hipMemAdvise", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemAdvise) + cuda2hipRename["cuMemRangeGetAttribute"] = {"hipMemRangeGetAttribute", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemRangeGetAttribute) + cuda2hipRename["cuMemRangeGetAttributes"] = {"hipMemRangeGetAttributes", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // API_Runtime ANALOGUE (cudaMemRangeGetAttributes) + cuda2hipRename["cuPointerGetAttribute"] = {"hipPointerGetAttribute", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuPointerGetAttributes"] = {"hipPointerGetAttributes", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuPointerSetAttribute"] = {"hipPointerSetAttribute", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}; // Texture Reference Mngmnt // Texture reference filtering modes @@ -1089,17 +1114,217 @@ struct cuda2hipMap { cuda2hipRename["CU_TR_FILTER_MODE_POINT"] = {"hipFilterModePoint", CONV_TEX, API_DRIVER}; // 0 // API_Runtime ANALOGUE (cudaFilterModePoint = 0) cuda2hipRename["CU_TR_FILTER_MODE_LINEAR"] = {"hipFilterModeLinear", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; // 1 // API_Runtime ANALOGUE (cudaFilterModeLinear = 1) - cuda2hipRename["cuTexRefSetBorderColor"] = {"hipTexRefSetBorderColor", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE - cuda2hipRename["cuTexRefGetBorderColor"] = {"hipTexRefGetBorderColor", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE + cuda2hipRename["cuTexRefGetAddress"] = {"hipTexRefGetAddress", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetAddressMode"] = {"hipTexRefGetAddressMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetArray"] = {"hipTexRefGetArray", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetBorderColor"] = {"hipTexRefGetBorderColor", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE + cuda2hipRename["cuTexRefGetFilterMode"] = {"hipTexRefGetFilterMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetFlags"] = {"hipTexRefGetFlags", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetFormat"] = {"hipTexRefGetFormat", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetMaxAnisotropy"] = {"hipTexRefGetMaxAnisotropy", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetMipmapFilterMode"] = {"hipTexRefGetMipmapFilterMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetMipmapLevelBias"] = {"hipTexRefGetMipmapLevelBias", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetMipmapLevelClamp"] = {"hipTexRefGetMipmapLevelClamp", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefGetMipmappedArray"] = {"hipTexRefGetMipmappedArray", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetAddress"] = {"hipTexRefSetAddress", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetAddress2D"] = {"hipTexRefSetAddress2D", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetAddressMode"] = {"hipTexRefSetAddressMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetArray"] = {"hipTexRefSetArray", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetBorderColor"] = {"hipTexRefSetBorderColor", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; // [CUDA 8.0.44] // no API_Runtime ANALOGUE + cuda2hipRename["cuTexRefSetFilterMode"] = {"hipTexRefSetFilterMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetFlags"] = {"hipTexRefSetFlags", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetFormat"] = {"hipTexRefSetFormat", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetMaxAnisotropy"] = {"hipTexRefSetMaxAnisotropy", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetMipmapFilterMode"] = {"hipTexRefSetMipmapFilterMode", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetMipmapLevelBias"] = {"hipTexRefSetMipmapLevelBias", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetMipmapLevelClamp"] = {"hipTexRefSetMipmapLevelClamp", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefSetMipmappedArray"] = {"hipTexRefSetMipmappedArray", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + + // Texture Reference Mngmnt [DEPRECATED] + cuda2hipRename["cuTexRefCreate"] = {"hipTexRefCreate", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexRefDestroy"] = {"hipTexRefDestroy", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + + // Surface Reference Mngmnt + cuda2hipRename["cuSurfRefGetArray"] = {"hipSurfRefGetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuSurfRefSetArray"] = {"hipSurfRefSetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}; + + // Texture Object Mngmnt + cuda2hipRename["cuTexObjectCreate"] = {"hipTexObjectCreate", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexObjectDestroy"] = {"hipTexObjectDestroy", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexObjectGetResourceDesc"] = {"hipTexObjectGetResourceDesc", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexObjectGetResourceViewDesc"] = {"hipTexObjectGetResourceViewDesc", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuTexObjectGetTextureDesc"] = {"hipTexObjectGetTextureDesc", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + + // Surface Object Mngmnt + cuda2hipRename["cuSurfObjectCreate"] = {"hipSurfObjectCreate", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuSurfObjectDestroy"] = {"hipSurfObjectDestroy", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuSurfObjectGetResourceDesc"] = {"hipSurfObjectGetResourceDesc", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; + + // Graphics Interoperability + cuda2hipRename["cuGraphicsMapResources"] = {"hipGraphicsMapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsMapResources) + cuda2hipRename["cuGraphicsResourceGetMappedMipmappedArray"] = {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedMipmappedArray) + cuda2hipRename["cuGraphicsResourceGetMappedPointer"] = {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedPointer) + cuda2hipRename["cuGraphicsResourceSetMapFlags"] = {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsResourceSetMapFlags) + cuda2hipRename["cuGraphicsSubResourceGetMappedArray"] = {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsSubResourceGetMappedArray) + cuda2hipRename["cuGraphicsUnmapResources"] = {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsUnmapResources) + cuda2hipRename["cuGraphicsUnregisterResource"] = {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsUnregisterResource) // Profiler - // unsupported yet by HIP - cuda2hipRename["cuProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaProfilerInitialize) + cuda2hipRename["cuProfilerStart"] = {"hipProfilerStart", CONV_OTHER, API_DRIVER}; // API_Runtime ANALOGUE (cudaProfilerStart) + cuda2hipRename["cuProfilerStop"] = {"hipProfilerStop", CONV_OTHER, API_DRIVER}; // API_Runtime ANALOGUE (cudaProfilerStop) - cuda2hipRename["cuProfilerStart"] = {"hipProfilerStart", CONV_OTHER, API_DRIVER}; - cuda2hipRename["cuProfilerStop"] = {"hipProfilerStop", CONV_OTHER, API_DRIVER}; + // OpenGL Interoperability + // enum CUGLDeviceList/CUGLDeviceList_enum + cuda2hipRename["CUGLDeviceList"] = {"hipGLDeviceList", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLDeviceList) + // cuda2hipRename["CUGLDeviceList_enum"] = {"hipGLDeviceList", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_GL_DEVICE_LIST_ALL"] = {"HIP_GL_DEVICE_LIST_ALL", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaGLDeviceListAll) + cuda2hipRename["CU_GL_DEVICE_LIST_CURRENT_FRAME"] = {"HIP_GL_DEVICE_LIST_CURRENT_FRAME", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaGLDeviceListCurrentFrame) + cuda2hipRename["CU_GL_DEVICE_LIST_NEXT_FRAME"] = {"HIP_GL_DEVICE_LIST_NEXT_FRAME", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaGLDeviceListNextFrame) - /////////////////////////////// CUDA RT API /////////////////////////////// + cuda2hipRename["cuGLGetDevices"] = {"hipGLGetDevices", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLGetDevices) + cuda2hipRename["cuGraphicsGLRegisterBuffer"] = {"hipGraphicsGLRegisterBuffer", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsGLRegisterBuffer) + cuda2hipRename["cuGraphicsGLRegisterImage"] = {"hipGraphicsGLRegisterImage", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsGLRegisterImage) + cuda2hipRename["cuWGLGetDevice"] = {"hipWGLGetDevice", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaWGLGetDevice) + + // OpenGL Interoperability [DEPRECATED] + // enum CUGLmap_flags/CUGLmap_flags_enum + cuda2hipRename["CUGLmap_flags"] = {"hipGLMapFlags", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLMapFlags) + // cuda2hipRename["CUGLmap_flags_enum"] = {"hipGLMapFlags", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_GL_MAP_RESOURCE_FLAGS_NONE"] = {"HIP_GL_MAP_RESOURCE_FLAGS_NONE", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x00 // API_Runtime ANALOGUE (cudaGLMapFlagsNone) + cuda2hipRename["CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY"] = {"HIP_GL_MAP_RESOURCE_FLAGS_READ_ONLY", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaGLMapFlagsReadOnly) + cuda2hipRename["CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD"] = {"HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaGLMapFlagsWriteDiscard) + + cuda2hipRename["cuGLCtxCreate"] = {"hipGLCtxCreate", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuGLInit"] = {"hipGLInit", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuGLMapBufferObject"] = {"hipGLMapBufferObject", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaGLMapBufferObject due to different signatures + cuda2hipRename["cuGLMapBufferObjectAsync"] = {"hipGLMapBufferObjectAsync", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // Not equal to cudaGLMapBufferObjectAsync due to different signatures + cuda2hipRename["cuGLRegisterBufferObject"] = {"hipGLRegisterBufferObject", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLRegisterBufferObject) + cuda2hipRename["cuGLSetBufferObjectMapFlags"] = {"hipGLSetBufferObjectMapFlags", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLSetBufferObjectMapFlags) + cuda2hipRename["cuGLUnmapBufferObject"] = {"hipGLUnmapBufferObject", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLUnmapBufferObject) + cuda2hipRename["cuGLUnmapBufferObjectAsync"] = {"hipGLUnmapBufferObjectAsync", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLUnmapBufferObjectAsync) + cuda2hipRename["cuGLUnregisterBufferObject"] = {"hipGLUnregisterBufferObject", CONV_GL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGLUnregisterBufferObject) + + // Direct3D 9 Interoperability + // enum CUd3d9DeviceList/CUd3d9DeviceList_enum + cuda2hipRename["CUd3d9DeviceList"] = {"hipD3D9DeviceList", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9DeviceList) + // cuda2hipRename["CUd3d9DeviceList_enum"] = {"hipD3D9DeviceList", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D9_DEVICE_LIST_ALL"] = {"HIP_D3D9_DEVICE_LIST_ALL", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D9DeviceListAll) + cuda2hipRename["CU_D3D9_DEVICE_LIST_CURRENT_FRAME"] = {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaD3D9DeviceListCurrentFrame) + cuda2hipRename["CU_D3D9_DEVICE_LIST_NEXT_FRAME"] = {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaD3D9DeviceListNextFrame) + + cuda2hipRename["cuD3D9CtxCreate"] = {"hipD3D9CtxCreate", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D9CtxCreateOnDevice"] = {"hipD3D9CtxCreateOnDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D9GetDevice"] = {"hipD3D9GetDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9GetDevice) + cuda2hipRename["cuD3D9GetDevices"] = {"hipD3D9GetDevices", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9GetDevices) + cuda2hipRename["cuD3D9GetDirect3DDevice"] = {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9GetDirect3DDevice) + cuda2hipRename["cuGraphicsD3D9RegisterResource"] = {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsD3D9RegisterResource) + + // Direct3D 9 Interoperability [DEPRECATED] + // enum CUd3d9map_flags/CUd3d9map_flags_enum + cuda2hipRename["CUd3d9map_flags"] = {"hipD3D9MapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9MapFlags) + // cuda2hipRename["CUd3d9map_flags_enum"] = {"hipD3D9MapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D9_MAPRESOURCE_FLAGS_NONE"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x00 // API_Runtime ANALOGUE (cudaD3D9MapFlagsNone) + cuda2hipRename["CU_D3D9_MAPRESOURCE_FLAGS_READONLY"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D9MapFlagsReadOnly) + cuda2hipRename["CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaD3D9MapFlagsWriteDiscard) + + // enum CUd3d9register_flags/CUd3d9register_flags_enum + cuda2hipRename["CUd3d9register_flags"] = {"hipD3D9RegisterFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9RegisterFlags) + // cuda2hipRename["CUd3d9register_flags_enum"] = {"hipD3D9RegisterFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D9_REGISTER_FLAGS_NONE"] = {"HIP_D3D9_REGISTER_FLAGS_NONE", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x00 // API_Runtime ANALOGUE (cudaD3D9RegisterFlagsNone) + cuda2hipRename["CU_D3D9_REGISTER_FLAGS_ARRAY"] = {"HIP_D3D9_REGISTER_FLAGS_ARRAY", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D9RegisterFlagsArray) + + cuda2hipRename["cuD3D9MapResources"] = {"hipD3D9MapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9MapResources) + cuda2hipRename["cuD3D9RegisterResource"] = {"hipD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9RegisterResource) + cuda2hipRename["cuD3D9ResourceGetMappedArray"] = {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedArray) + cuda2hipRename["cuD3D9ResourceGetMappedPitch"] = {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPitch) + cuda2hipRename["cuD3D9ResourceGetMappedPointer"] = {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPointer) + cuda2hipRename["cuD3D9ResourceGetMappedSize"] = {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedSize) + cuda2hipRename["cuD3D9ResourceGetSurfaceDimensions"] = {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceGetSurfaceDimensions) + cuda2hipRename["cuD3D9ResourceSetMapFlags"] = {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9ResourceSetMapFlags) + cuda2hipRename["cuD3D9UnmapResources"] = {"hipD3D9UnmapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9UnmapResources) + cuda2hipRename["cuD3D9UnregisterResource"] = {"hipD3D9UnregisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D9UnregisterResource) + + // Direct3D 10 Interoperability + // enum CUd3d10DeviceList/CUd3d10DeviceList_enum + cuda2hipRename["CUd3d10DeviceList"] = {"hipd3d10DeviceList", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10DeviceList) + // cuda2hipRename["CUd3d10DeviceList_enum"] = {"hipD3D10DeviceList", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D10_DEVICE_LIST_ALL"] = {"HIP_D3D10_DEVICE_LIST_ALL", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D10DeviceListAll) + cuda2hipRename["CU_D3D10_DEVICE_LIST_CURRENT_FRAME"] = {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaD3D10DeviceListCurrentFrame) + cuda2hipRename["CU_D3D10_DEVICE_LIST_NEXT_FRAME"] = {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaD3D10DeviceListNextFrame) + + cuda2hipRename["cuD3D10GetDevice"] = {"hipD3D10GetDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10GetDevice) + cuda2hipRename["cuD3D10GetDevices"] = {"hipD3D10GetDevices", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10GetDevices) + cuda2hipRename["cuGraphicsD3D10RegisterResource"] = {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsD3D10RegisterResource) + + // Direct3D 10 Interoperability [DEPRECATED] + // enum CUd3d10map_flags/CUd3d10map_flags_enum + cuda2hipRename["CUd3d10map_flags"] = {"hipD3D10MapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10MapFlags) + // cuda2hipRename["CUd3d10map_flags_enum"] = {"hipD3D10MapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D10_MAPRESOURCE_FLAGS_NONE"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x00 // API_Runtime ANALOGUE (cudaD3D10MapFlagsNone) + cuda2hipRename["CU_D3D10_MAPRESOURCE_FLAGS_READONLY"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D10MapFlagsReadOnly) + cuda2hipRename["CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaD3D10MapFlagsWriteDiscard) + + // enum CUd3d10register_flags/CUd3d10register_flags_enum + cuda2hipRename["CUd3d10register_flags"] = {"hipD3D10RegisterFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10RegisterFlags) + // cuda2hipRename["CUd3d10register_flags_enum"] = {"hipD3D10RegisterFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D10_REGISTER_FLAGS_NONE"] = {"HIP_D3D10_REGISTER_FLAGS_NONE", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x00 // API_Runtime ANALOGUE (cudaD3D10RegisterFlagsNone) + cuda2hipRename["CU_D3D10_REGISTER_FLAGS_ARRAY"] = {"HIP_D3D10_REGISTER_FLAGS_ARRAY", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D10RegisterFlagsArray) + + cuda2hipRename["cuD3D10CtxCreate"] = {"hipD3D10CtxCreate", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D10CtxCreateOnDevice"] = {"hipD3D10CtxCreateOnDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D10GetDirect3DDevice"] = {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10GetDirect3DDevice) + cuda2hipRename["cuD3D10MapResources"] = {"hipD3D10MapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10MapResources) + cuda2hipRename["cuD3D10RegisterResource"] = {"hipD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10RegisterResource) + cuda2hipRename["cuD3D10ResourceGetMappedArray"] = {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedArray) + cuda2hipRename["cuD3D10ResourceGetMappedPitch"] = {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPitch) + cuda2hipRename["cuD3D10ResourceGetMappedPointer"] = {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPointer) + cuda2hipRename["cuD3D10ResourceGetMappedSize"] = {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedSize) + cuda2hipRename["cuD3D10ResourceGetSurfaceDimensions"] = {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceGetSurfaceDimensions) + cuda2hipRename["cuD310ResourceSetMapFlags"] = {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10ResourceSetMapFlags) + cuda2hipRename["cuD3D10UnmapResources"] = {"hipD3D10UnmapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10UnmapResources) + cuda2hipRename["cuD3D10UnregisterResource"] = {"hipD3D10UnregisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D10UnregisterResource) + + // Direct3D 11 Interoperability + // enum CUd3d11DeviceList/CUd3d11DeviceList_enum + cuda2hipRename["CUd3d11DeviceList"] = {"hipd3d11DeviceList", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D11DeviceList) + // cuda2hipRename["CUd3d11DeviceList_enum"] = {"hipD3D11DeviceList", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CU_D3D11_DEVICE_LIST_ALL"] = {"HIP_D3D11_DEVICE_LIST_ALL", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // 0x01 // API_Runtime ANALOGUE (cudaD3D11DeviceListAll) + cuda2hipRename["CU_D3D11_DEVICE_LIST_CURRENT_FRAME"] = {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // 0x02 // API_Runtime ANALOGUE (cudaD3D11DeviceListCurrentFrame) + cuda2hipRename["CU_D3D11_DEVICE_LIST_NEXT_FRAME"] = {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // 0x03 // API_Runtime ANALOGUE (cudaD3D11DeviceListNextFrame) + + cuda2hipRename["cuD3D11GetDevice"] = {"hipD3D11GetDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D11GetDevice) + cuda2hipRename["cuD3D11GetDevices"] = {"hipD3D11GetDevices", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D11GetDevices) + cuda2hipRename["cuGraphicsD3D11RegisterResource"] = {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsD3D11RegisterResource) + + // Direct3D 11 Interoperability [DEPRECATED] + cuda2hipRename["cuD3D11CtxCreate"] = {"hipD3D11CtxCreate", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D11CtxCreateOnDevice"] = {"hipD3D11CtxCreateOnDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuD3D11GetDirect3DDevice"] = {"hipD3D11GetDirect3DDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaD3D11GetDirect3DDevice) + + // VDPAU Interoperability + cuda2hipRename["cuGraphicsVDPAURegisterOutputSurface"] = {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterOutputSurface) + cuda2hipRename["cuGraphicsVDPAURegisterVideoSurface"] = {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterVideoSurface) + cuda2hipRename["cuVDPAUGetDevice"] = {"hipVDPAUGetDevice", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaVDPAUGetDevice) + cuda2hipRename["cuVDPAUCtxCreate"] = {"hipVDPAUCtxCreate", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + + // EGL Interoperability + cuda2hipRename["CUeglStreamConnection_st"] = {"hipEglStreamConnection", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEglStreamConnection) + cuda2hipRename["CUeglStreamConnection"] = {"hipEglStreamConnection", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEglStreamConnection) + + cuda2hipRename["cuEGLStreamConsumerAcquireFrame"] = {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamConsumerAcquireFrame) + cuda2hipRename["cuEGLStreamConsumerConnect"] = {"hipEGLStreamConsumerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnect) + cuda2hipRename["cuEGLStreamConsumerConnectWithFlags"] = {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnectWithFlags) + cuda2hipRename["cuEGLStreamConsumerDisconnect"] = {"hipEGLStreamConsumerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // no API_Runtime ANALOGUE + cuda2hipRename["cuEGLStreamConsumerReleaseFrame"] = {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamConsumerReleaseFrame) + cuda2hipRename["cuEGLStreamProducerConnect"] = {"hipEGLStreamProducerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamProducerConnect) + cuda2hipRename["cuEGLStreamProducerDisconnect"] = {"hipEGLStreamProducerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamProducerDisconnect) + cuda2hipRename["cuEGLStreamProducerPresentFrame"] = {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamProducerPresentFrame) + cuda2hipRename["cuEGLStreamProducerReturnFrame"] = {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaEGLStreamProducerReturnFrame) + cuda2hipRename["cuGraphicsEGLRegisterImage"] = {"hipGraphicsEGLRegisterImage", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsEGLRegisterImage) + cuda2hipRename["cuGraphicsResourceGetMappedEglFrame"] = {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedEglFrame) + +/////////////////////////////// CUDA RT API /////////////////////////////// // Data types // unsupported yet by HIP [CUDA 8.0.44] cuda2hipRename["cudaDataType_t"] = {"hipDataType_t", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; @@ -1137,26 +1362,26 @@ struct cuda2hipMap { cuda2hipRename["cudaOccupancyDefault"] = {"hipOccupancyDefault", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x00 // API_Driver ANALOGUE (CU_OCCUPANCY_DEFAULT = 0x0) cuda2hipRename["cudaOccupancyDisableCachingOverride"] = {"hipOccupancyDisableCachingOverride", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_Driver ANALOGUE (CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1) + cuda2hipRename["cudaStreamCallback_t"] = {"hipStreamCallback_t", CONV_TYPE, API_RUNTIME}; + // Error API - cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; - cuda2hipRename["cudaPeekAtLastError"] = {"hipPeekAtLastError", CONV_ERR, API_RUNTIME}; - cuda2hipRename["cudaGetErrorName"] = {"hipGetErrorName", CONV_ERR, API_RUNTIME}; - cuda2hipRename["cudaGetErrorString"] = {"hipGetErrorString", CONV_ERR, API_RUNTIME}; + cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERROR, API_RUNTIME}; + cuda2hipRename["cudaPeekAtLastError"] = {"hipPeekAtLastError", CONV_ERROR, API_RUNTIME}; + cuda2hipRename["cudaGetErrorName"] = {"hipGetErrorName", CONV_ERROR, API_RUNTIME}; + cuda2hipRename["cudaGetErrorString"] = {"hipGetErrorString", CONV_ERROR, API_RUNTIME}; // Arrays cuda2hipRename["cudaArray"] = {"hipArray", CONV_MEM, API_RUNTIME}; // typedef struct cudaArray *cudaArray_t; - cuda2hipRename["cudaArray_t"] = {"hipArray *", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaArray_t"] = {"hipArray_t", CONV_MEM, API_RUNTIME}; // typedef const struct cudaArray *cudaArray_const_t; - cuda2hipRename["cudaArray_const_t"] = {"const hipArray *", CONV_MEM, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaMipmappedArray_t"] = {"hipMipmappedArray *", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaMipmappedArray_const_t"] = {"const hipMipmappedArray *", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaArray_const_t"] = {"hipArray_const_t", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaMipmappedArray_t"] = {"hipMipmappedArray_t", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaMipmappedArray_const_t"] = {"hipMipmappedArray_const_t", CONV_MEM, API_RUNTIME}; // memcpy // memcpy structs - // unsupported yet by HIP - cuda2hipRename["cudaMemcpy3DParms"] = {"hipMemcpy3DParms", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaMemcpy3DParms"] = {"hipMemcpy3DParms", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMemcpy3DPeerParms"] = {"hipMemcpy3DPeerParms", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; // memcpy functions @@ -1173,7 +1398,7 @@ struct cuda2hipMap { cuda2hipRename["cudaMemcpy2DFromArray"] = {"hipMemcpy2DFromArray", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemcpy2DFromArrayAsync"] = {"hipMemcpy2DFromArrayAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemcpy2DToArrayAsync"] = {"hipMemcpy2DToArrayAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaMemcpy3D"] = {"hipMemcpy3D", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaMemcpy3D"] = {"hipMemcpy3D", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMemcpy3DAsync"] = {"hipMemcpy3DAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemcpy3DPeer"] = {"hipMemcpy3DPeer", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemcpy3DPeerAsync"] = {"hipMemcpy3DPeerAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; @@ -1212,8 +1437,8 @@ struct cuda2hipMap { // memset cuda2hipRename["cudaMemset"] = {"hipMemset", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMemsetAsync"] = {"hipMemsetAsync", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaMemset2D"] = {"hipMemset2D", CONV_MEM, API_RUNTIME}; // unsupported yet by HIP - cuda2hipRename["cudaMemset2D"] = {"hipMemset2D", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemset2DAsync"] = {"hipMemset2DAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemset3D"] = {"hipMemset3D", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMemset3DAsync"] = {"hipMemset3DAsync", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; @@ -1232,9 +1457,8 @@ struct cuda2hipMap { cuda2hipRename["cudaMalloc"] = {"hipMalloc", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMallocHost"] = {"hipHostMalloc", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMallocArray"] = {"hipMallocArray", CONV_MEM, API_RUNTIME}; - // unsupported yet by HIP cuda2hipRename["cudaMalloc3D"] = {"hipMalloc3D", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaMalloc3DArray"] = {"hipMalloc3DArray", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaMalloc3DArray"] = {"hipMalloc3DArray", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMallocManaged"] = {"hipMallocManaged", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMallocMipmappedArray"] = {"hipMallocMipmappedArray", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaMallocPitch"] = {"hipMallocPitch", CONV_MEM, API_RUNTIME}; @@ -1253,14 +1477,13 @@ struct cuda2hipMap { cuda2hipRename["cudaMemoryTypeDevice"] = {"hipMemoryTypeDevice", CONV_MEM, API_RUNTIME}; // make memory functions - // unsupported yet by HIP - cuda2hipRename["make_cudaExtent"] = {"make_hipExtent", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["make_cudaPitchedPtr"] = {"make_hipPitchedPtr", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["make_cudaPos"] = {"make_hipPos", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["make_cudaExtent"] = {"make_hipExtent", CONV_MEM, API_RUNTIME}; + cuda2hipRename["make_cudaPitchedPtr"] = {"make_hipPitchedPtr", CONV_MEM, API_RUNTIME}; + cuda2hipRename["make_cudaPos"] = {"make_hipPos", CONV_MEM, API_RUNTIME}; - cuda2hipRename["cudaExtent"] = {"hipExtent", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaPitchedPtr"] = {"hipPitchedPtr", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaPos"] = {"hipPos", CONV_MEM, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaExtent"] = {"hipExtent", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaPitchedPtr"] = {"hipPitchedPtr", CONV_MEM, API_RUNTIME}; + cuda2hipRename["cudaPos"] = {"hipPos", CONV_MEM, API_RUNTIME}; // Host Malloc Flags cuda2hipRename["cudaHostAllocDefault"] = {"hipHostMallocDefault", CONV_MEM, API_RUNTIME}; @@ -1324,7 +1547,6 @@ struct cuda2hipMap { cuda2hipRename["cudaStream_t"] = {"hipStream_t", CONV_TYPE, API_RUNTIME}; cuda2hipRename["cudaStreamCreate"] = {"hipStreamCreate", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamCreateWithFlags"] = {"hipStreamCreateWithFlags", CONV_STREAM, API_RUNTIME}; - // unsupported yet by HIP cuda2hipRename["cudaStreamCreateWithPriority"] = {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}; @@ -1332,124 +1554,127 @@ struct cuda2hipMap { cuda2hipRename["cudaStreamGetFlags"] = {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamQuery"] = {"hipStreamQuery", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_RUNTIME}; - // unsupported yet by HIP cuda2hipRename["cudaStreamAttachMemAsync"] = {"hipStreamAttachMemAsync", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaStreamGetPriority"] = {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}; // Stream Flags - cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM, API_RUNTIME}; - cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_STREAM, API_RUNTIME}; + cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_TYPE, API_RUNTIME}; // Other synchronization - cuda2hipRename["cudaDeviceSynchronize"] = {"hipDeviceSynchronize", CONV_DEV, API_RUNTIME}; - // translate deprecated cudaThreadSynchronize - cuda2hipRename["cudaThreadSynchronize"] = {"hipDeviceSynchronize", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceReset"] = {"hipDeviceReset", CONV_DEV, API_RUNTIME}; - // translate deprecated cudaThreadExit - cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaChooseDevice"] = {"hipChooseDevice", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceSynchronize"] = {"hipDeviceSynchronize", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaDeviceReset"] = {"hipDeviceReset", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaChooseDevice"] = {"hipChooseDevice", CONV_DEVICE, API_RUNTIME}; + + // Thread Management + cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_THREAD, API_RUNTIME}; + cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_THREAD, API_RUNTIME}; + cuda2hipRename["cudaThreadGetLimit"] = {"hipThreadGetLimit", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_THREAD, API_RUNTIME}; + cuda2hipRename["cudaThreadSetLimit"] = {"hipThreadSetLimit", CONV_THREAD, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaThreadSynchronize"] = {"hipDeviceSynchronize", CONV_THREAD, API_RUNTIME}; // Attributes - cuda2hipRename["cudaDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEVICE, API_RUNTIME}; cuda2hipRename["cudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME}; // API_DRIVER ANALOGUE (CUdevice_attribute) - cuda2hipRename["cudaDevAttrMaxThreadsPerBlock"] = {"hipDeviceAttributeMaxThreadsPerBlock", CONV_DEV, API_RUNTIME}; // 1 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1) - cuda2hipRename["cudaDevAttrMaxBlockDimX"] = {"hipDeviceAttributeMaxBlockDimX", CONV_DEV, API_RUNTIME}; // 2 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2) - cuda2hipRename["cudaDevAttrMaxBlockDimY"] = {"hipDeviceAttributeMaxBlockDimY", CONV_DEV, API_RUNTIME}; // 3 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3) - cuda2hipRename["cudaDevAttrMaxBlockDimZ"] = {"hipDeviceAttributeMaxBlockDimZ", CONV_DEV, API_RUNTIME}; // 4 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4) - cuda2hipRename["cudaDevAttrMaxGridDimX"] = {"hipDeviceAttributeMaxGridDimX", CONV_DEV, API_RUNTIME}; // 5 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5) - cuda2hipRename["cudaDevAttrMaxGridDimY"] = {"hipDeviceAttributeMaxGridDimY", CONV_DEV, API_RUNTIME}; // 6 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 6) - cuda2hipRename["cudaDevAttrMaxGridDimZ"] = {"hipDeviceAttributeMaxGridDimZ", CONV_DEV, API_RUNTIME}; // 7 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 7) - cuda2hipRename["cudaDevAttrMaxSharedMemoryPerBlock"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_DEV, API_RUNTIME}; // 8 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8) - cuda2hipRename["cudaDevAttrTotalConstantMemory"] = {"hipDeviceAttributeTotalConstantMemory", CONV_DEV, API_RUNTIME}; // 9 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY =9) - cuda2hipRename["cudaDevAttrWarpSize"] = {"hipDeviceAttributeWarpSize", CONV_DEV, API_RUNTIME}; // 10 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10) - cuda2hipRename["cudaDevAttrMaxPitch"] = {"hipDeviceAttributeMaxPitch", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 11 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11) - cuda2hipRename["cudaDevAttrMaxRegistersPerBlock"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_DEV, API_RUNTIME}; // 12 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12) - cuda2hipRename["cudaDevAttrClockRate"] = {"hipDeviceAttributeClockRate", CONV_DEV, API_RUNTIME}; // 13 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13) - cuda2hipRename["cudaDevAttrTextureAlignment"] = {"hipDeviceAttributeTextureAlignment", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 14 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14) + cuda2hipRename["cudaDevAttrMaxThreadsPerBlock"] = {"hipDeviceAttributeMaxThreadsPerBlock", CONV_TYPE, API_RUNTIME}; // 1 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1) + cuda2hipRename["cudaDevAttrMaxBlockDimX"] = {"hipDeviceAttributeMaxBlockDimX", CONV_TYPE, API_RUNTIME}; // 2 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2) + cuda2hipRename["cudaDevAttrMaxBlockDimY"] = {"hipDeviceAttributeMaxBlockDimY", CONV_TYPE, API_RUNTIME}; // 3 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3) + cuda2hipRename["cudaDevAttrMaxBlockDimZ"] = {"hipDeviceAttributeMaxBlockDimZ", CONV_TYPE, API_RUNTIME}; // 4 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4) + cuda2hipRename["cudaDevAttrMaxGridDimX"] = {"hipDeviceAttributeMaxGridDimX", CONV_TYPE, API_RUNTIME}; // 5 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5) + cuda2hipRename["cudaDevAttrMaxGridDimY"] = {"hipDeviceAttributeMaxGridDimY", CONV_TYPE, API_RUNTIME}; // 6 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 6) + cuda2hipRename["cudaDevAttrMaxGridDimZ"] = {"hipDeviceAttributeMaxGridDimZ", CONV_TYPE, API_RUNTIME}; // 7 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 7) + cuda2hipRename["cudaDevAttrMaxSharedMemoryPerBlock"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_TYPE, API_RUNTIME}; // 8 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8) + cuda2hipRename["cudaDevAttrTotalConstantMemory"] = {"hipDeviceAttributeTotalConstantMemory", CONV_TYPE, API_RUNTIME}; // 9 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY =9) + cuda2hipRename["cudaDevAttrWarpSize"] = {"hipDeviceAttributeWarpSize", CONV_TYPE, API_RUNTIME}; // 10 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10) + cuda2hipRename["cudaDevAttrMaxPitch"] = {"hipDeviceAttributeMaxPitch", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 11 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11) + cuda2hipRename["cudaDevAttrMaxRegistersPerBlock"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_TYPE, API_RUNTIME}; // 12 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12) + cuda2hipRename["cudaDevAttrClockRate"] = {"hipDeviceAttributeClockRate", CONV_TYPE, API_RUNTIME}; // 13 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13) + cuda2hipRename["cudaDevAttrTextureAlignment"] = {"hipDeviceAttributeTextureAlignment", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 14 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14) // Is not deprecated as CUDA Driver's API analogue CU_DEVICE_ATTRIBUTE_GPU_OVERLAP - cuda2hipRename["cudaDevAttrGpuOverlap"] = {"hipDeviceAttributeGpuOverlap", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 15 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15) - cuda2hipRename["cudaDevAttrMultiProcessorCount"] = {"hipDeviceAttributeMultiprocessorCount", CONV_DEV, API_RUNTIME}; // 16 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16) - cuda2hipRename["cudaDevAttrKernelExecTimeout"] = {"hipDeviceAttributeKernelExecTimeout", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 17 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17) - cuda2hipRename["cudaDevAttrIntegrated"] = {"hipDeviceAttributeIntegrated", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 18 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_INTEGRATED = 18) - cuda2hipRename["cudaDevAttrCanMapHostMemory"] = {"hipDeviceAttributeCanMapHostMemory", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 19 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19) - cuda2hipRename["cudaDevAttrComputeMode"] = {"hipDeviceAttributeComputeMode", CONV_DEV, API_RUNTIME}; // 20 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20) - cuda2hipRename["cudaDevAttrMaxTexture1DWidth"] = {"hipDeviceAttributeMaxTexture1DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 21 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21) - cuda2hipRename["cudaDevAttrMaxTexture2DWidth"] = {"hipDeviceAttributeMaxTexture2DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 22 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22) - cuda2hipRename["cudaDevAttrMaxTexture2DHeight"] = {"hipDeviceAttributeMaxTexture2DHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 23 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23) - cuda2hipRename["cudaDevAttrMaxTexture3DWidth"] = {"hipDeviceAttributeMaxTexture3DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 24 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24) - cuda2hipRename["cudaDevAttrMaxTexture3DHeight"] = {"hipDeviceAttributeMaxTexture3DHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 25 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25) - cuda2hipRename["cudaDevAttrMaxTexture3DDepth"] = {"hipDeviceAttributeMaxTexture3DDepth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 26 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26) - cuda2hipRename["cudaDevAttrMaxTexture2DLayeredWidth"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 27 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27) - cuda2hipRename["cudaDevAttrMaxTexture2DLayeredHeight"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 28 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28) - cuda2hipRename["cudaDevAttrMaxTexture2DLayeredLayers"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 29 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29) - cuda2hipRename["cudaDevAttrSurfaceAlignment"] = {"hipDeviceAttributeSurfaceAlignment", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 30 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30) - cuda2hipRename["cudaDevAttrConcurrentKernels"] = {"hipDeviceAttributeConcurrentKernels", CONV_DEV, API_RUNTIME}; // 31 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31) - cuda2hipRename["cudaDevAttrEccEnabled"] = {"hipDeviceAttributeEccEnabled", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 32 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32) - cuda2hipRename["cudaDevAttrPciBusId"] = {"hipDeviceAttributePciBusId", CONV_DEV, API_RUNTIME}; // 33 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33) - cuda2hipRename["cudaDevAttrPciDeviceId"] = {"hipDeviceAttributePciDeviceId", CONV_DEV, API_RUNTIME}; // 34 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34) - cuda2hipRename["cudaDevAttrTccDriver"] = {"hipDeviceAttributeTccDriver", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 35 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35) - cuda2hipRename["cudaDevAttrMemoryClockRate"] = {"hipDeviceAttributeMemoryClockRate", CONV_DEV, API_RUNTIME}; // 36 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36) - cuda2hipRename["cudaDevAttrGlobalMemoryBusWidth"] = {"hipDeviceAttributeMemoryBusWidth", CONV_DEV, API_RUNTIME}; // 37 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37) - cuda2hipRename["cudaDevAttrL2CacheSize"] = {"hipDeviceAttributeL2CacheSize", CONV_DEV, API_RUNTIME}; // 38 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38) - cuda2hipRename["cudaDevAttrMaxThreadsPerMultiProcessor"] = {"hipDeviceAttributeMaxThreadsPerMultiProcessor", CONV_DEV, API_RUNTIME}; // 39 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39) - cuda2hipRename["cudaDevAttrAsyncEngineCount"] = {"hipDeviceAttributeAsyncEngineCount", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 40 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40) - cuda2hipRename["cudaDevAttrUnifiedAddressing"] = {"hipDeviceAttributeUnifiedAddressing", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 41 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41) - cuda2hipRename["cudaDevAttrMaxTexture1DLayeredWidth"] = {"hipDeviceAttributeMaxTexture1DLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 42 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42) - cuda2hipRename["cudaDevAttrMaxTexture1DLayeredLayers"] = {"hipDeviceAttributeMaxTexture1DLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 43 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43) + cuda2hipRename["cudaDevAttrGpuOverlap"] = {"hipDeviceAttributeGpuOverlap", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 15 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15) + cuda2hipRename["cudaDevAttrMultiProcessorCount"] = {"hipDeviceAttributeMultiprocessorCount", CONV_TYPE, API_RUNTIME}; // 16 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16) + cuda2hipRename["cudaDevAttrKernelExecTimeout"] = {"hipDeviceAttributeKernelExecTimeout", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 17 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17) + cuda2hipRename["cudaDevAttrIntegrated"] = {"hipDeviceAttributeIntegrated", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 18 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_INTEGRATED = 18) + cuda2hipRename["cudaDevAttrCanMapHostMemory"] = {"hipDeviceAttributeCanMapHostMemory", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 19 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19) + cuda2hipRename["cudaDevAttrComputeMode"] = {"hipDeviceAttributeComputeMode", CONV_TYPE, API_RUNTIME}; // 20 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20) + cuda2hipRename["cudaDevAttrMaxTexture1DWidth"] = {"hipDeviceAttributeMaxTexture1DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 21 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21) + cuda2hipRename["cudaDevAttrMaxTexture2DWidth"] = {"hipDeviceAttributeMaxTexture2DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 22 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22) + cuda2hipRename["cudaDevAttrMaxTexture2DHeight"] = {"hipDeviceAttributeMaxTexture2DHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 23 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23) + cuda2hipRename["cudaDevAttrMaxTexture3DWidth"] = {"hipDeviceAttributeMaxTexture3DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 24 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24) + cuda2hipRename["cudaDevAttrMaxTexture3DHeight"] = {"hipDeviceAttributeMaxTexture3DHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 25 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25) + cuda2hipRename["cudaDevAttrMaxTexture3DDepth"] = {"hipDeviceAttributeMaxTexture3DDepth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 26 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26) + cuda2hipRename["cudaDevAttrMaxTexture2DLayeredWidth"] = {"hipDeviceAttributeMaxTexture2DLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 27 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27) + cuda2hipRename["cudaDevAttrMaxTexture2DLayeredHeight"] = {"hipDeviceAttributeMaxTexture2DLayeredHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 28 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28) + cuda2hipRename["cudaDevAttrMaxTexture2DLayeredLayers"] = {"hipDeviceAttributeMaxTexture2DLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 29 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29) + cuda2hipRename["cudaDevAttrSurfaceAlignment"] = {"hipDeviceAttributeSurfaceAlignment", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 30 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30) + cuda2hipRename["cudaDevAttrConcurrentKernels"] = {"hipDeviceAttributeConcurrentKernels", CONV_TYPE, API_RUNTIME}; // 31 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31) + cuda2hipRename["cudaDevAttrEccEnabled"] = {"hipDeviceAttributeEccEnabled", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 32 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32) + cuda2hipRename["cudaDevAttrPciBusId"] = {"hipDeviceAttributePciBusId", CONV_TYPE, API_RUNTIME}; // 33 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33) + cuda2hipRename["cudaDevAttrPciDeviceId"] = {"hipDeviceAttributePciDeviceId", CONV_TYPE, API_RUNTIME}; // 34 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34) + cuda2hipRename["cudaDevAttrTccDriver"] = {"hipDeviceAttributeTccDriver", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 35 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35) + cuda2hipRename["cudaDevAttrMemoryClockRate"] = {"hipDeviceAttributeMemoryClockRate", CONV_TYPE, API_RUNTIME}; // 36 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36) + cuda2hipRename["cudaDevAttrGlobalMemoryBusWidth"] = {"hipDeviceAttributeMemoryBusWidth", CONV_TYPE, API_RUNTIME}; // 37 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37) + cuda2hipRename["cudaDevAttrL2CacheSize"] = {"hipDeviceAttributeL2CacheSize", CONV_TYPE, API_RUNTIME}; // 38 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38) + cuda2hipRename["cudaDevAttrMaxThreadsPerMultiProcessor"] = {"hipDeviceAttributeMaxThreadsPerMultiProcessor", CONV_TYPE, API_RUNTIME}; // 39 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39) + cuda2hipRename["cudaDevAttrAsyncEngineCount"] = {"hipDeviceAttributeAsyncEngineCount", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 40 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40) + cuda2hipRename["cudaDevAttrUnifiedAddressing"] = {"hipDeviceAttributeUnifiedAddressing", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 41 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41) + cuda2hipRename["cudaDevAttrMaxTexture1DLayeredWidth"] = {"hipDeviceAttributeMaxTexture1DLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 42 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42) + cuda2hipRename["cudaDevAttrMaxTexture1DLayeredLayers"] = {"hipDeviceAttributeMaxTexture1DLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 43 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43) // 44 - no - cuda2hipRename["cudaDevAttrMaxTexture2DGatherWidth"] = {"hipDeviceAttributeMaxTexture2DGatherWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 45 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45) - cuda2hipRename["cudaDevAttrMaxTexture2DGatherHeight"] = {"hipDeviceAttributeMaxTexture2DGatherHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 46 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46) - cuda2hipRename["cudaDevAttrMaxTexture3DWidthAlt"] = {"hipDeviceAttributeMaxTexture3DWidthAlternate", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 47 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47) - cuda2hipRename["cudaDevAttrMaxTexture3DHeightAlt"] = {"hipDeviceAttributeMaxTexture3DHeightAlternate", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 48 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48) - cuda2hipRename["cudaDevAttrMaxTexture3DDepthAlt"] = {"hipDeviceAttributeMaxTexture3DDepthAlternate", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 49 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49) - cuda2hipRename["cudaDevAttrPciDomainId"] = {"hipDeviceAttributePciDomainId", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 50 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50) - cuda2hipRename["cudaDevAttrTexturePitchAlignment"] = {"hipDeviceAttributeTexturePitchAlignment", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 51 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51) - cuda2hipRename["cudaDevAttrMaxTextureCubemapWidth"] = {"hipDeviceAttributeMaxTextureCubemapWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 52 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52) - cuda2hipRename["cudaDevAttrMaxTextureCubemapLayeredWidth"] = {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 53 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53) - cuda2hipRename["cudaDevAttrMaxTextureCubemapLayeredLayers"] = {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 54 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54) - cuda2hipRename["cudaDevAttrMaxSurface1DWidth"] = {"hipDeviceAttributeMaxSurface1DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 55 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55) - cuda2hipRename["cudaDevAttrMaxSurface2DWidth"] = {"hipDeviceAttributeMaxSurface2DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 56 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56) - cuda2hipRename["cudaDevAttrMaxSurface2DHeight"] = {"hipDeviceAttributeMaxSurface2DHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 57 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57) - cuda2hipRename["cudaDevAttrMaxSurface3DWidth"] = {"hipDeviceAttributeMaxSurface3DWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 58 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58) - cuda2hipRename["cudaDevAttrMaxSurface3DHeight"] = {"hipDeviceAttributeMaxSurface3DHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 59 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59) - cuda2hipRename["cudaDevAttrMaxSurface3DDepth"] = {"hipDeviceAttributeMaxSurface3DDepth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 60 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60) - cuda2hipRename["cudaDevAttrMaxSurface1DLayeredWidth"] = {"hipDeviceAttributeMaxSurface1DLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 61 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61) - cuda2hipRename["cudaDevAttrMaxSurface1DLayeredLayers"] = {"hipDeviceAttributeMaxSurface1DLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 62 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62) - cuda2hipRename["cudaDevAttrMaxSurface2DLayeredWidth"] = {"hipDeviceAttributeMaxSurface2DLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 63 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63) - cuda2hipRename["cudaDevAttrMaxSurface2DLayeredHeight"] = {"hipDeviceAttributeMaxSurface2DLayeredHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 64 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64) - cuda2hipRename["cudaDevAttrMaxSurface2DLayeredLayers"] = {"hipDeviceAttributeMaxSurface2DLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 65 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65) - cuda2hipRename["cudaDevAttrMaxSurfaceCubemapWidth"] = {"hipDeviceAttributeMaxSurfaceCubemapWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 66 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66) - cuda2hipRename["cudaDevAttrMaxSurfaceCubemapLayeredWidth"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 67 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67) - cuda2hipRename["cudaDevAttrMaxSurfaceCubemapLayeredLayers"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 68 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68) - cuda2hipRename["cudaDevAttrMaxTexture1DLinearWidth"] = {"hipDeviceAttributeMaxTexture1DLinearWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 69 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69) - cuda2hipRename["cudaDevAttrMaxTexture2DLinearWidth"] = {"hipDeviceAttributeMaxTexture2DLinearWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 70 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70) - cuda2hipRename["cudaDevAttrMaxTexture2DLinearHeight"] = {"hipDeviceAttributeMaxTexture2DLinearHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 71 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71) - cuda2hipRename["cudaDevAttrMaxTexture2DLinearPitch"] = {"hipDeviceAttributeMaxTexture2DLinearPitch", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 72 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72) - cuda2hipRename["cudaDevAttrMaxTexture2DMipmappedWidth"] = {"hipDeviceAttributeMaxTexture2DMipmappedWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 73 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73) - cuda2hipRename["cudaDevAttrMaxTexture2DMipmappedHeight"] = {"hipDeviceAttributeMaxTexture2DMipmappedHeight", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 74 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74) - cuda2hipRename["cudaDevAttrComputeCapabilityMajor"] = {"hipDeviceAttributeComputeCapabilityMajor", CONV_DEV, API_RUNTIME}; // 75 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75) - cuda2hipRename["cudaDevAttrComputeCapabilityMinor"] = {"hipDeviceAttributeComputeCapabilityMinor", CONV_DEV, API_RUNTIME}; // 76 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76) - cuda2hipRename["cudaDevAttrMaxTexture1DMipmappedWidth"] = {"hipDeviceAttributeMaxTexture1DMipmappedWidth", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 77 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77) - cuda2hipRename["cudaDevAttrStreamPrioritiesSupported"] = {"hipDeviceAttributeStreamPrioritiesSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 78 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78) - cuda2hipRename["cudaDevAttrGlobalL1CacheSupported"] = {"hipDeviceAttributeGlobalL1CacheSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 79 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79) - cuda2hipRename["cudaDevAttrLocalL1CacheSupported"] = {"hipDeviceAttributeLocalL1CacheSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 80 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80) - cuda2hipRename["cudaDevAttrMaxSharedMemoryPerMultiprocessor"] = {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", CONV_DEV, API_RUNTIME}; // 81 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81) - cuda2hipRename["cudaDevAttrMaxRegistersPerMultiprocessor"] = {"hipDeviceAttributeMaxRegistersPerMultiprocessor", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 82 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82) - cuda2hipRename["cudaDevAttrManagedMemory"] = {"hipDeviceAttributeManagedMemory", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 83 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83) - cuda2hipRename["cudaDevAttrIsMultiGpuBoard"] = {"hipDeviceAttributeIsMultiGpuBoard", CONV_DEV, API_RUNTIME}; // 84 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84) - cuda2hipRename["cudaDevAttrMultiGpuBoardGroupID"] = {"hipDeviceAttributeMultiGpuBoardGroupID", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 85 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85) + cuda2hipRename["cudaDevAttrMaxTexture2DGatherWidth"] = {"hipDeviceAttributeMaxTexture2DGatherWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 45 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45) + cuda2hipRename["cudaDevAttrMaxTexture2DGatherHeight"] = {"hipDeviceAttributeMaxTexture2DGatherHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 46 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46) + cuda2hipRename["cudaDevAttrMaxTexture3DWidthAlt"] = {"hipDeviceAttributeMaxTexture3DWidthAlternate", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 47 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47) + cuda2hipRename["cudaDevAttrMaxTexture3DHeightAlt"] = {"hipDeviceAttributeMaxTexture3DHeightAlternate", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 48 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48) + cuda2hipRename["cudaDevAttrMaxTexture3DDepthAlt"] = {"hipDeviceAttributeMaxTexture3DDepthAlternate", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 49 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49) + cuda2hipRename["cudaDevAttrPciDomainId"] = {"hipDeviceAttributePciDomainId", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 50 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50) + cuda2hipRename["cudaDevAttrTexturePitchAlignment"] = {"hipDeviceAttributeTexturePitchAlignment", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 51 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51) + cuda2hipRename["cudaDevAttrMaxTextureCubemapWidth"] = {"hipDeviceAttributeMaxTextureCubemapWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 52 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52) + cuda2hipRename["cudaDevAttrMaxTextureCubemapLayeredWidth"] = {"hipDeviceAttributeMaxTextureCubemapLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 53 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53) + cuda2hipRename["cudaDevAttrMaxTextureCubemapLayeredLayers"] = {"hipDeviceAttributeMaxTextureCubemapLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 54 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54) + cuda2hipRename["cudaDevAttrMaxSurface1DWidth"] = {"hipDeviceAttributeMaxSurface1DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 55 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55) + cuda2hipRename["cudaDevAttrMaxSurface2DWidth"] = {"hipDeviceAttributeMaxSurface2DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 56 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56) + cuda2hipRename["cudaDevAttrMaxSurface2DHeight"] = {"hipDeviceAttributeMaxSurface2DHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 57 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57) + cuda2hipRename["cudaDevAttrMaxSurface3DWidth"] = {"hipDeviceAttributeMaxSurface3DWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 58 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58) + cuda2hipRename["cudaDevAttrMaxSurface3DHeight"] = {"hipDeviceAttributeMaxSurface3DHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 59 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59) + cuda2hipRename["cudaDevAttrMaxSurface3DDepth"] = {"hipDeviceAttributeMaxSurface3DDepth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 60 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60) + cuda2hipRename["cudaDevAttrMaxSurface1DLayeredWidth"] = {"hipDeviceAttributeMaxSurface1DLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 61 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61) + cuda2hipRename["cudaDevAttrMaxSurface1DLayeredLayers"] = {"hipDeviceAttributeMaxSurface1DLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 62 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62) + cuda2hipRename["cudaDevAttrMaxSurface2DLayeredWidth"] = {"hipDeviceAttributeMaxSurface2DLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 63 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63) + cuda2hipRename["cudaDevAttrMaxSurface2DLayeredHeight"] = {"hipDeviceAttributeMaxSurface2DLayeredHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 64 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64) + cuda2hipRename["cudaDevAttrMaxSurface2DLayeredLayers"] = {"hipDeviceAttributeMaxSurface2DLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 65 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65) + cuda2hipRename["cudaDevAttrMaxSurfaceCubemapWidth"] = {"hipDeviceAttributeMaxSurfaceCubemapWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 66 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66) + cuda2hipRename["cudaDevAttrMaxSurfaceCubemapLayeredWidth"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 67 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67) + cuda2hipRename["cudaDevAttrMaxSurfaceCubemapLayeredLayers"] = {"hipDeviceAttributeMaxSurfaceCubemapLayeredLayers", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 68 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68) + cuda2hipRename["cudaDevAttrMaxTexture1DLinearWidth"] = {"hipDeviceAttributeMaxTexture1DLinearWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 69 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69) + cuda2hipRename["cudaDevAttrMaxTexture2DLinearWidth"] = {"hipDeviceAttributeMaxTexture2DLinearWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 70 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70) + cuda2hipRename["cudaDevAttrMaxTexture2DLinearHeight"] = {"hipDeviceAttributeMaxTexture2DLinearHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 71 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71) + cuda2hipRename["cudaDevAttrMaxTexture2DLinearPitch"] = {"hipDeviceAttributeMaxTexture2DLinearPitch", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 72 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72) + cuda2hipRename["cudaDevAttrMaxTexture2DMipmappedWidth"] = {"hipDeviceAttributeMaxTexture2DMipmappedWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 73 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73) + cuda2hipRename["cudaDevAttrMaxTexture2DMipmappedHeight"] = {"hipDeviceAttributeMaxTexture2DMipmappedHeight", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 74 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74) + cuda2hipRename["cudaDevAttrComputeCapabilityMajor"] = {"hipDeviceAttributeComputeCapabilityMajor", CONV_TYPE, API_RUNTIME}; // 75 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75) + cuda2hipRename["cudaDevAttrComputeCapabilityMinor"] = {"hipDeviceAttributeComputeCapabilityMinor", CONV_TYPE, API_RUNTIME}; // 76 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76) + cuda2hipRename["cudaDevAttrMaxTexture1DMipmappedWidth"] = {"hipDeviceAttributeMaxTexture1DMipmappedWidth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 77 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77) + cuda2hipRename["cudaDevAttrStreamPrioritiesSupported"] = {"hipDeviceAttributeStreamPrioritiesSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 78 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78) + cuda2hipRename["cudaDevAttrGlobalL1CacheSupported"] = {"hipDeviceAttributeGlobalL1CacheSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 79 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79) + cuda2hipRename["cudaDevAttrLocalL1CacheSupported"] = {"hipDeviceAttributeLocalL1CacheSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 80 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80) + cuda2hipRename["cudaDevAttrMaxSharedMemoryPerMultiprocessor"] = {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", CONV_TYPE, API_RUNTIME}; // 81 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81) + cuda2hipRename["cudaDevAttrMaxRegistersPerMultiprocessor"] = {"hipDeviceAttributeMaxRegistersPerMultiprocessor", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 82 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82) + cuda2hipRename["cudaDevAttrManagedMemory"] = {"hipDeviceAttributeManagedMemory", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 83 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83) + cuda2hipRename["cudaDevAttrIsMultiGpuBoard"] = {"hipDeviceAttributeIsMultiGpuBoard", CONV_TYPE, API_RUNTIME}; // 84 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84) + cuda2hipRename["cudaDevAttrMultiGpuBoardGroupID"] = {"hipDeviceAttributeMultiGpuBoardGroupID", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 85 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85) // unsupported yet by HIP [CUDA 8.0.44] - cuda2hipRename["cudaDevAttrHostNativeAtomicSupported"] = {"hipDeviceAttributeHostNativeAtomicSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 86 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86) - cuda2hipRename["cudaDevAttrSingleToDoublePrecisionPerfRatio"] = {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 87 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87) - cuda2hipRename["cudaDevAttrPageableMemoryAccess"] = {"hipDeviceAttributePageableMemoryAccess", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 88 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88) - cuda2hipRename["cudaDevAttrConcurrentManagedAccess"] = {"hipDeviceAttributeConcurrentManagedAccess", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 89 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89) - cuda2hipRename["cudaDevAttrComputePreemptionSupported"] = {"hipDeviceAttributeComputePreemptionSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 90 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90) - cuda2hipRename["cudaDevAttrCanUseHostPointerForRegisteredMem"] = {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 91 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91) + cuda2hipRename["cudaDevAttrHostNativeAtomicSupported"] = {"hipDeviceAttributeHostNativeAtomicSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 86 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86) + cuda2hipRename["cudaDevAttrSingleToDoublePrecisionPerfRatio"] = {"hipDeviceAttributeSingleToDoublePrecisionPerfRatio", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 87 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87) + cuda2hipRename["cudaDevAttrPageableMemoryAccess"] = {"hipDeviceAttributePageableMemoryAccess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 88 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88) + cuda2hipRename["cudaDevAttrConcurrentManagedAccess"] = {"hipDeviceAttributeConcurrentManagedAccess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 89 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89) + cuda2hipRename["cudaDevAttrComputePreemptionSupported"] = {"hipDeviceAttributeComputePreemptionSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 90 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90) + cuda2hipRename["cudaDevAttrCanUseHostPointerForRegisteredMem"] = {"hipDeviceAttributeCanUseHostPointerForRegisteredMem", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 91 // API_DRIVER ANALOGUE (CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91) // Pointer Attributes // struct cudaPointerAttributes @@ -1460,54 +1685,49 @@ struct cuda2hipMap { // Device cuda2hipRename["cudaDeviceProp"] = {"hipDeviceProp_t", CONV_TYPE, API_RUNTIME}; - cuda2hipRename["cudaGetDeviceProperties"] = {"hipGetDeviceProperties", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaGetDeviceProperties"] = {"hipGetDeviceProperties", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_RUNTIME}; // unsupported yet by HIP - cuda2hipRename["cudaDeviceGetStreamPriorityRange"] = {"hipDeviceGetStreamPriorityRange", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaSetValidDevices"] = {"hipSetValidDevices", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaDeviceGetStreamPriorityRange"] = {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaSetValidDevices"] = {"hipSetValidDevices", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}; // unsupported yet by HIP [CUDA 8.0.44] // P2P Attributes - cuda2hipRename["cudaDeviceP2PAttr"] = {"hipDeviceP2PAttribute", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (CUdevice_P2PAttribute) - cuda2hipRename["cudaDevP2PAttrPerformanceRank"] = {"hipDeviceP2PAttributePerformanceRank", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01) - cuda2hipRename["cudaDevP2PAttrAccessSupported"] = {"hipDeviceP2PAttributeAccessSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02) - cuda2hipRename["cudaDevP2PAttrNativeAtomicSupported"] = {"hipDeviceP2PAttributeNativeAtomicSupported", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03) + cuda2hipRename["cudaDeviceP2PAttr"] = {"hipDeviceP2PAttribute", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (CUdevice_P2PAttribute) + cuda2hipRename["cudaDevP2PAttrPerformanceRank"] = {"hipDeviceP2PAttributePerformanceRank", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01) + cuda2hipRename["cudaDevP2PAttrAccessSupported"] = {"hipDeviceP2PAttributeAccessSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02) + cuda2hipRename["cudaDevP2PAttrNativeAtomicSupported"] = {"hipDeviceP2PAttributeNativeAtomicSupported", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_DRIVER ANALOGUE (CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03) // [CUDA 8.0.44] - cuda2hipRename["cudaDeviceGetP2PAttribute"] = {"hipDeviceGetP2PAttribute", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (cuDeviceGetP2PAttribute) + cuda2hipRename["cudaDeviceGetP2PAttribute"] = {"hipDeviceGetP2PAttribute", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (cuDeviceGetP2PAttribute) // Compute mode - cuda2hipRename["cudaComputeMode"] = {"hipComputeMode", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (CUcomputemode) - cuda2hipRename["cudaComputeModeDefault"] = {"hipComputeModeDefault", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0) - cuda2hipRename["cudaComputeModeExclusive"] = {"hipComputeModeExclusive", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1) - cuda2hipRename["cudaComputeModeProhibited"] = {"hipComputeModeProhibited", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2) - cuda2hipRename["cudaComputeModeExclusiveProcess"] = {"hipComputeModeExclusiveProcess", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3) + cuda2hipRename["cudaComputeMode"] = {"hipComputeMode", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // API_DRIVER ANALOGUE (CUcomputemode) + cuda2hipRename["cudaComputeModeDefault"] = {"hipComputeModeDefault", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_DEFAULT = 0) + cuda2hipRename["cudaComputeModeExclusive"] = {"hipComputeModeExclusive", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE = 1) + cuda2hipRename["cudaComputeModeProhibited"] = {"hipComputeModeProhibited", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_PROHIBITED = 2) + cuda2hipRename["cudaComputeModeExclusiveProcess"] = {"hipComputeModeExclusiveProcess", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 3 // API_DRIVER ANALOGUE (CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3) // Device Flags - // unsupported yet by HIP - cuda2hipRename["cudaGetDeviceFlags"] = {"hipGetDeviceFlags", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaSetDeviceFlags"] = {"hipSetDeviceFlags", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceScheduleAuto"] = {"hipDeviceScheduleAuto", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceScheduleSpin"] = {"hipDeviceScheduleSpin", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceScheduleYield"] = {"hipDeviceScheduleYield", CONV_DEV, API_RUNTIME}; - // deprecated as of CUDA 4.0 and replaced with cudaDeviceScheduleBlockingSync - cuda2hipRename["cudaDeviceBlockingSync"] = {"hipDeviceScheduleBlockingSync", CONV_DEV, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaDeviceScheduleBlockingSync"] = {"hipDeviceScheduleBlockingSync", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceScheduleMask"] = {"hipDeviceScheduleMask", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaGetDeviceFlags"] = {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaSetDeviceFlags"] = {"hipSetDeviceFlags", CONV_DEVICE, API_RUNTIME}; - cuda2hipRename["cudaDeviceMapHost"] = {"hipDeviceMapHost", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceScheduleAuto"] = {"hipDeviceScheduleAuto", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaDeviceScheduleSpin"] = {"hipDeviceScheduleSpin", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaDeviceScheduleYield"] = {"hipDeviceScheduleYield", CONV_TYPE, API_RUNTIME}; + // deprecated as of CUDA 4.0 and replaced with cudaDeviceScheduleBlockingSync + cuda2hipRename["cudaDeviceBlockingSync"] = {"hipDeviceScheduleBlockingSync", CONV_TYPE, API_RUNTIME}; // unsupported yet by HIP - cuda2hipRename["cudaDeviceLmemResizeToMax"] = {"hipDeviceLmemResizeToMax", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaDeviceMask"] = {"hipDeviceMask", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaDeviceScheduleBlockingSync"] = {"hipDeviceScheduleBlockingSync", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaDeviceScheduleMask"] = {"hipDeviceScheduleMask", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; + + cuda2hipRename["cudaDeviceMapHost"] = {"hipDeviceMapHost", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaDeviceLmemResizeToMax"] = {"hipDeviceLmemResizeToMax", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaDeviceMask"] = {"hipDeviceMask", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // Cache config cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME}; - // translate deprecated - cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME}; cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME}; - // translate deprecated - cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME}; cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_CACHE, API_RUNTIME}; // Execution control @@ -1533,25 +1753,25 @@ struct cuda2hipMap { cuda2hipRename["cudaLaunch"] = {"hipLaunch", CONV_EXEC, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaSetupArgument"] = {"hipSetupArgument", CONV_EXEC, API_RUNTIME, HIP_UNSUPPORTED}; - // Driver/Runtime - cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_DEV, API_RUNTIME, HIP_UNSUPPORTED}; + // Version Management + cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_VERSION, API_RUNTIME}; + cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_VERSION, API_RUNTIME, HIP_UNSUPPORTED}; // Occupancy - cuda2hipRename["cudaOccupancyMaxPotentialBlockSize"] = {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}; + cuda2hipRename["cudaOccupancyMaxPotentialBlockSize"] = {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_RUNTIME}; // unsupported yet by HIP - cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeWithFlags"] = {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cudaOccupancyMaxActiveBlocksPerMultiprocessor"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}; + cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeWithFlags"] = {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaOccupancyMaxActiveBlocksPerMultiprocessor"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_RUNTIME}; // unsupported yet by HIP - cuda2hipRename["cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeVariableSMem"] = {"hipOccupancyMaxPotentialBlockSizeVariableSMem", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags"] = {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"] = {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeVariableSMem"] = {"hipOccupancyMaxPotentialBlockSizeVariableSMem", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags"] = {"hipOccupancyMaxPotentialBlockSizeVariableSMemWithFlags", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}; // Peer2Peer - cuda2hipRename["cudaDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceDisablePeerAccess"] = {"hipDeviceDisablePeerAccess", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceEnablePeerAccess"] = {"hipDeviceEnablePeerAccess", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_PEER, API_RUNTIME}; + cuda2hipRename["cudaDeviceDisablePeerAccess"] = {"hipDeviceDisablePeerAccess", CONV_PEER, API_RUNTIME}; + cuda2hipRename["cudaDeviceEnablePeerAccess"] = {"hipDeviceEnablePeerAccess", CONV_PEER, API_RUNTIME}; + cuda2hipRename["cudaMemcpyPeerAsync"] = {"hipMemcpyPeerAsync", CONV_MEM, API_RUNTIME}; cuda2hipRename["cudaMemcpyPeer"] = {"hipMemcpyPeer", CONV_MEM, API_RUNTIME}; @@ -1559,17 +1779,16 @@ struct cuda2hipMap { cuda2hipRename["cudaIpcMemLazyEnablePeerAccess"] = {"hipIpcMemLazyEnablePeerAccess", CONV_TYPE, API_RUNTIME}; // 0x01 // API_Driver ANALOGUE (CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1) // Shared memory - cuda2hipRename["cudaDeviceSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}; // translate deprecated - cuda2hipRename["cudaThreadSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEV, API_RUNTIME}; - // translate deprecated - cuda2hipRename["cudaThreadGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEV, API_RUNTIME}; + // cuda2hipRename["cudaThreadGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}; + // cuda2hipRename["cudaThreadSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}; - cuda2hipRename["cudaSharedMemConfig"] = {"hipSharedMemConfig", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaSharedMemBankSizeDefault"] = {"hipSharedMemBankSizeDefault", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaSharedMemBankSizeFourByte"] = {"hipSharedMemBankSizeFourByte", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaSharedMemBankSizeEightByte"] = {"hipSharedMemBankSizeEightByte", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaSharedMemConfig"] = {"hipSharedMemConfig", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaSharedMemBankSizeDefault"] = {"hipSharedMemBankSizeDefault", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaSharedMemBankSizeFourByte"] = {"hipSharedMemBankSizeFourByte", CONV_TYPE, API_RUNTIME}; + cuda2hipRename["cudaSharedMemBankSizeEightByte"] = {"hipSharedMemBankSizeEightByte", CONV_TYPE, API_RUNTIME}; // Limits cuda2hipRename["cudaLimit"] = {"hipLimit_t", CONV_TYPE, API_RUNTIME}; // API_Driver ANALOGUE (CUlimit) @@ -1579,14 +1798,12 @@ struct cuda2hipMap { cuda2hipRename["cudaLimitDevRuntimeSyncDepth"] = {"hipLimitDevRuntimeSyncDepth", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_Driver ANALOGUE (CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03) cuda2hipRename["cudaLimitDevRuntimePendingLaunchCount"] = {"hipLimitDevRuntimePendingLaunchCount", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}; // 0x04 // API_Driver ANALOGUE (CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04) - cuda2hipRename["cudaDeviceGetLimit"] = {"hipDeviceGetLimit", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetLimit"] = {"hipDeviceGetLimit", CONV_DEVICE, API_RUNTIME}; // Profiler - // unsupported yet by HIP - cuda2hipRename["cudaProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_RUNTIME, HIP_UNSUPPORTED}; - - cuda2hipRename["cudaProfilerStart"] = {"hipProfilerStart", CONV_OTHER, API_RUNTIME}; - cuda2hipRename["cudaProfilerStop"] = {"hipProfilerStop", CONV_OTHER, API_RUNTIME}; + cuda2hipRename["cudaProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuProfilerInitialize) + cuda2hipRename["cudaProfilerStart"] = {"hipProfilerStart", CONV_OTHER, API_RUNTIME}; // API_Driver ANALOGUE (cuProfilerStart) + cuda2hipRename["cudaProfilerStop"] = {"hipProfilerStop", CONV_OTHER, API_RUNTIME}; // API_Driver ANALOGUE (cuProfilerStop) // unsupported yet by HIP cuda2hipRename["cudaOutputMode"] = {"hipOutputMode", CONV_OTHER, API_RUNTIME, HIP_UNSUPPORTED}; @@ -1597,21 +1814,19 @@ struct cuda2hipMap { // enums cuda2hipRename["cudaTextureReadMode"] = {"hipTextureReadMode", CONV_TEX, API_RUNTIME}; cuda2hipRename["cudaReadModeElementType"] = {"hipReadModeElementType", CONV_TEX, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaReadModeNormalizedFloat"] = {"hipReadModeNormalizedFloat", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaReadModeNormalizedFloat"] = {"hipReadModeNormalizedFloat", CONV_TEX, API_RUNTIME}; cuda2hipRename["cudaTextureFilterMode"] = {"hipTextureFilterMode", CONV_TEX, API_RUNTIME}; // API_DRIVER ANALOGUE (CUfilter_mode) cuda2hipRename["cudaFilterModePoint"] = {"hipFilterModePoint", CONV_TEX, API_RUNTIME}; // 0 // API_DRIVER ANALOGUE (CU_TR_FILTER_MODE_POINT = 0) - cuda2hipRename["cudaFilterModeLinear"] = {"hipFilterModeLinear", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_DRIVER ANALOGUE (CU_TR_FILTER_MODE_POINT = 1) + cuda2hipRename["cudaFilterModeLinear"] = {"hipFilterModeLinear", CONV_TEX, API_RUNTIME}; // 1 // API_DRIVER ANALOGUE (CU_TR_FILTER_MODE_POINT = 1) cuda2hipRename["cudaBindTexture"] = {"hipBindTexture", CONV_TEX, API_RUNTIME}; cuda2hipRename["cudaUnbindTexture"] = {"hipUnbindTexture", CONV_TEX, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaBindTexture2D"] = {"hipBindTexture2D", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaBindTextureToArray"] = {"hipBindTextureToArray", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaBindTextureToMipmappedArray"] = {"hipBindTextureToMipmappedArray", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGetTextureAlignmentOffset"] = {"hipGetTextureAlignmentOffset", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGetTextureReference"] = {"hipGetTextureReference", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaBindTexture2D"] = {"hipBindTexture2D", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaBindTextureToArray"] = {"hipBindTextureToArray", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaBindTextureToMipmappedArray"] = {"hipBindTextureToMipmappedArray", CONV_TEX, API_RUNTIME}; // Unsupported yet on NVCC path + cuda2hipRename["cudaGetTextureAlignmentOffset"] = {"hipGetTextureAlignmentOffset", CONV_TEX, API_RUNTIME}; // Unsupported yet on NVCC path + cuda2hipRename["cudaGetTextureReference"] = {"hipGetTextureReference", CONV_TEX, API_RUNTIME}; // Unsupported yet on NVCC path // Channel cuda2hipRename["cudaChannelFormatKind"] = {"hipChannelFormatKind", CONV_TEX, API_RUNTIME}; @@ -1620,75 +1835,80 @@ struct cuda2hipMap { cuda2hipRename["cudaChannelFormatKindFloat"] = {"hipChannelFormatKindFloat", CONV_TEX, API_RUNTIME}; cuda2hipRename["cudaChannelFormatKindNone"] = {"hipChannelFormatKindNone", CONV_TEX, API_RUNTIME}; cuda2hipRename["cudaChannelFormatDesc"] = {"hipChannelFormatDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaCreateChannelDesc"] = {"hipCreateChannelDesc", CONV_TEX, API_RUNTIME}; - // unsupported yet by HIP - cuda2hipRename["cudaGetChannelDesc"] = {"hipGetChannelDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaGetChannelDesc"] = {"hipGetChannelDesc", CONV_TEX, API_RUNTIME}; // Texture Object Management // structs - // unsupported yet by HIP - cuda2hipRename["cudaResourceDesc"] = {"hipResourceDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaResourceViewDesc"] = {"hipResourceViewDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaTextureDesc"] = {"hipTextureDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaResourceDesc"] = {"hipResourceDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaResourceViewDesc"] = {"hipResourceViewDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaTextureDesc"] = {"hipTextureDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["surfaceReference"] = {"hipSurfaceReference", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}; + // Left unchanged + // cuda2hipRename["textureReference"] = {"textureReference", CONV_TEX, API_RUNTIME}; + + // typedefs + cuda2hipRename["cudaTextureObject_t"] = {"hipTextureObject_t", CONV_TEX, API_RUNTIME}; // enums // enum cudaResourceType - cuda2hipRename["cudaResourceType"] = {"hipResourceType", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUresourcetype) - cuda2hipRename["cudaResourceTypeArray"] = {"hipResourceTypeArray", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x00 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_ARRAY = 0x00) - cuda2hipRename["cudaResourceTypeMipmappedArray"] = {"hipResourceTypeMipmappedArray", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01) - cuda2hipRename["cudaResourceTypeLinear"] = {"hipResourceTypeLinear", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_LINEAR = 0x02) - cuda2hipRename["cudaResourceTypePitch2D"] = {"hipResourceTypePitch2D", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_PITCH2D = 0x03) + cuda2hipRename["cudaResourceType"] = {"hipResourceType", CONV_TEX, API_RUNTIME}; // API_Driver ANALOGUE (CUresourcetype) + cuda2hipRename["cudaResourceTypeArray"] = {"hipResourceTypeArray", CONV_TEX, API_RUNTIME}; // 0x00 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_ARRAY = 0x00) + cuda2hipRename["cudaResourceTypeMipmappedArray"] = {"hipResourceTypeMipmappedArray", CONV_TEX, API_RUNTIME}; // 0x01 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01) + cuda2hipRename["cudaResourceTypeLinear"] = {"hipResourceTypeLinear", CONV_TEX, API_RUNTIME}; // 0x02 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_LINEAR = 0x02) + cuda2hipRename["cudaResourceTypePitch2D"] = {"hipResourceTypePitch2D", CONV_TEX, API_RUNTIME}; // 0x03 // API_Driver ANALOGUE (CU_RESOURCE_TYPE_PITCH2D = 0x03) // enum cudaResourceViewFormat - cuda2hipRename["cudaResourceViewFormat"] = {"hipResourceViewFormat", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUresourceViewFormat) - cuda2hipRename["cudaResViewFormatNone"] = {"hipResViewFormatNone", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x00 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_NONE = 0x00) - cuda2hipRename["cudaResViewFormatUnsignedChar1"] = {"hipResViewFormatUnsignedChar1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01) - cuda2hipRename["cudaResViewFormatUnsignedChar2"] = {"hipResViewFormatUnsignedChar2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02) - cuda2hipRename["cudaResViewFormatUnsignedChar4"] = {"hipResViewFormatUnsignedChar4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03) - cuda2hipRename["cudaResViewFormatSignedChar1"] = {"hipResViewFormatSignedChar1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x04 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04) - cuda2hipRename["cudaResViewFormatSignedChar2"] = {"hipResViewFormatSignedChar2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x05 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05) - cuda2hipRename["cudaResViewFormatSignedChar4"] = {"hipResViewFormatSignedChar4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x06 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06) - cuda2hipRename["cudaResViewFormatUnsignedShort1"] = {"hipResViewFormatUnsignedShort1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x07 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07) - cuda2hipRename["cudaResViewFormatUnsignedShort2"] = {"hipResViewFormatUnsignedShort2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x08 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08) - cuda2hipRename["cudaResViewFormatUnsignedShort4"] = {"hipResViewFormatUnsignedShort4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x09 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09) - cuda2hipRename["cudaResViewFormatSignedShort1"] = {"hipResViewFormatSignedShort1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0a // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a) - cuda2hipRename["cudaResViewFormatSignedShort2"] = {"hipResViewFormatSignedShort2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0b // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b) - cuda2hipRename["cudaResViewFormatSignedShort4"] = {"hipResViewFormatSignedShort4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0c // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c) - cuda2hipRename["cudaResViewFormatUnsignedInt1"] = {"hipResViewFormatUnsignedInt1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0d // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d) - cuda2hipRename["cudaResViewFormatUnsignedInt2"] = {"hipResViewFormatUnsignedInt2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0e // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e) - cuda2hipRename["cudaResViewFormatUnsignedInt4"] = {"hipResViewFormatUnsignedInt4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x0f // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f) - cuda2hipRename["cudaResViewFormatSignedInt1"] = {"hipResViewFormatSignedInt1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x10 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10) - cuda2hipRename["cudaResViewFormatSignedInt2"] = {"hipResViewFormatSignedInt2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x11 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11) - cuda2hipRename["cudaResViewFormatSignedInt4"] = {"hipResViewFormatSignedInt4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x12 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12) - cuda2hipRename["cudaResViewFormatHalf1"] = {"hipResViewFormatHalf1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x13 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13) - cuda2hipRename["cudaResViewFormatHalf2"] = {"hipResViewFormatHalf2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x14 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14) - cuda2hipRename["cudaResViewFormatHalf4"] = {"hipResViewFormatHalf4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x15 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15) - cuda2hipRename["cudaResViewFormatFloat1"] = {"hipResViewFormatFloat1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x16 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16) - cuda2hipRename["cudaResViewFormatFloat2"] = {"hipResViewFormatFloat2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x17 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17) - cuda2hipRename["cudaResViewFormatFloat4"] = {"hipResViewFormatFloat4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x18 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed1"] = {"hipResViewFormatUnsignedBlockCompressed1", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x19 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed2"] = {"hipResViewFormatUnsignedBlockCompressed2", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1a // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed3"] = {"hipResViewFormatUnsignedBlockCompressed3", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1b // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed4"] = {"hipResViewFormatUnsignedBlockCompressed4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1c // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c) - cuda2hipRename["cudaResViewFormatSignedBlockCompressed4"] = {"hipResViewFormatSignedBlockCompressed4", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1d // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed5"] = {"hipResViewFormatUnsignedBlockCompressed5", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1e // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e) - cuda2hipRename["cudaResViewFormatSignedBlockCompressed5"] = {"hipResViewFormatSignedBlockCompressed5", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x1f // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed6H"] = {"hipResViewFormatUnsignedBlockCompressed6H", CONV_TEX, API_DRIVER, HIP_UNSUPPORTED}; // 0x20 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20) - cuda2hipRename["cudaResViewFormatSignedBlockCompressed6H"] = {"hipResViewFormatSignedBlockCompressed6H", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x21 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21) - cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed7"] = {"hipResViewFormatUnsignedBlockCompressed7", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; // 0x22 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22) + cuda2hipRename["cudaResourceViewFormat"] = {"hipResourceViewFormat", CONV_TEX, API_RUNTIME}; // API_Driver ANALOGUE (CUresourceViewFormat) + cuda2hipRename["cudaResViewFormatNone"] = {"hipResViewFormatNone", CONV_TEX, API_RUNTIME}; // 0x00 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_NONE = 0x00) + cuda2hipRename["cudaResViewFormatUnsignedChar1"] = {"hipResViewFormatUnsignedChar1", CONV_TEX, API_RUNTIME}; // 0x01 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01) + cuda2hipRename["cudaResViewFormatUnsignedChar2"] = {"hipResViewFormatUnsignedChar2", CONV_TEX, API_RUNTIME}; // 0x02 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02) + cuda2hipRename["cudaResViewFormatUnsignedChar4"] = {"hipResViewFormatUnsignedChar4", CONV_TEX, API_RUNTIME}; // 0x03 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03) + cuda2hipRename["cudaResViewFormatSignedChar1"] = {"hipResViewFormatSignedChar1", CONV_TEX, API_RUNTIME}; // 0x04 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04) + cuda2hipRename["cudaResViewFormatSignedChar2"] = {"hipResViewFormatSignedChar2", CONV_TEX, API_RUNTIME}; // 0x05 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05) + cuda2hipRename["cudaResViewFormatSignedChar4"] = {"hipResViewFormatSignedChar4", CONV_TEX, API_RUNTIME}; // 0x06 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06) + cuda2hipRename["cudaResViewFormatUnsignedShort1"] = {"hipResViewFormatUnsignedShort1", CONV_TEX, API_RUNTIME}; // 0x07 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07) + cuda2hipRename["cudaResViewFormatUnsignedShort2"] = {"hipResViewFormatUnsignedShort2", CONV_TEX, API_RUNTIME}; // 0x08 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08) + cuda2hipRename["cudaResViewFormatUnsignedShort4"] = {"hipResViewFormatUnsignedShort4", CONV_TEX, API_RUNTIME}; // 0x09 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09) + cuda2hipRename["cudaResViewFormatSignedShort1"] = {"hipResViewFormatSignedShort1", CONV_TEX, API_RUNTIME}; // 0x0a // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a) + cuda2hipRename["cudaResViewFormatSignedShort2"] = {"hipResViewFormatSignedShort2", CONV_TEX, API_RUNTIME}; // 0x0b // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b) + cuda2hipRename["cudaResViewFormatSignedShort4"] = {"hipResViewFormatSignedShort4", CONV_TEX, API_RUNTIME}; // 0x0c // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c) + cuda2hipRename["cudaResViewFormatUnsignedInt1"] = {"hipResViewFormatUnsignedInt1", CONV_TEX, API_RUNTIME}; // 0x0d // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d) + cuda2hipRename["cudaResViewFormatUnsignedInt2"] = {"hipResViewFormatUnsignedInt2", CONV_TEX, API_RUNTIME}; // 0x0e // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e) + cuda2hipRename["cudaResViewFormatUnsignedInt4"] = {"hipResViewFormatUnsignedInt4", CONV_TEX, API_RUNTIME}; // 0x0f // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f) + cuda2hipRename["cudaResViewFormatSignedInt1"] = {"hipResViewFormatSignedInt1", CONV_TEX, API_RUNTIME}; // 0x10 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10) + cuda2hipRename["cudaResViewFormatSignedInt2"] = {"hipResViewFormatSignedInt2", CONV_TEX, API_RUNTIME}; // 0x11 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11) + cuda2hipRename["cudaResViewFormatSignedInt4"] = {"hipResViewFormatSignedInt4", CONV_TEX, API_RUNTIME}; // 0x12 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12) + cuda2hipRename["cudaResViewFormatHalf1"] = {"hipResViewFormatHalf1", CONV_TEX, API_RUNTIME}; // 0x13 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13) + cuda2hipRename["cudaResViewFormatHalf2"] = {"hipResViewFormatHalf2", CONV_TEX, API_RUNTIME}; // 0x14 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14) + cuda2hipRename["cudaResViewFormatHalf4"] = {"hipResViewFormatHalf4", CONV_TEX, API_RUNTIME}; // 0x15 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15) + cuda2hipRename["cudaResViewFormatFloat1"] = {"hipResViewFormatFloat1", CONV_TEX, API_RUNTIME}; // 0x16 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16) + cuda2hipRename["cudaResViewFormatFloat2"] = {"hipResViewFormatFloat2", CONV_TEX, API_RUNTIME}; // 0x17 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17) + cuda2hipRename["cudaResViewFormatFloat4"] = {"hipResViewFormatFloat4", CONV_TEX, API_RUNTIME}; // 0x18 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed1"] = {"hipResViewFormatUnsignedBlockCompressed1", CONV_TEX, API_RUNTIME}; // 0x19 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed2"] = {"hipResViewFormatUnsignedBlockCompressed2", CONV_TEX, API_RUNTIME}; // 0x1a // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed3"] = {"hipResViewFormatUnsignedBlockCompressed3", CONV_TEX, API_RUNTIME}; // 0x1b // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed4"] = {"hipResViewFormatUnsignedBlockCompressed4", CONV_TEX, API_RUNTIME}; // 0x1c // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c) + cuda2hipRename["cudaResViewFormatSignedBlockCompressed4"] = {"hipResViewFormatSignedBlockCompressed4", CONV_TEX, API_RUNTIME}; // 0x1d // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed5"] = {"hipResViewFormatUnsignedBlockCompressed5", CONV_TEX, API_RUNTIME}; // 0x1e // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e) + cuda2hipRename["cudaResViewFormatSignedBlockCompressed5"] = {"hipResViewFormatSignedBlockCompressed5", CONV_TEX, API_RUNTIME}; // 0x1f // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed6H"] = {"hipResViewFormatUnsignedBlockCompressed6H", CONV_TEX, API_RUNTIME}; // 0x20 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20) + cuda2hipRename["cudaResViewFormatSignedBlockCompressed6H"] = {"hipResViewFormatSignedBlockCompressed6H", CONV_TEX, API_RUNTIME}; // 0x21 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21) + cuda2hipRename["cudaResViewFormatUnsignedBlockCompressed7"] = {"hipResViewFormatUnsignedBlockCompressed7", CONV_TEX, API_RUNTIME}; // 0x22 // API_Driver ANALOGUE (CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22) - cuda2hipRename["cudaTextureAddressMode"] = {"hipTextureAddressMode", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaAddressModeWrap"] = {"hipAddressModeWrap", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaAddressModeClamp"] = {"hipAddressModeClamp", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaAddressModeMirror"] = {"hipAddressModeMirror", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaAddressModeBorder"] = {"hipAddressModeBorder", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaTextureAddressMode"] = {"hipTextureAddressMode", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaAddressModeWrap"] = {"hipAddressModeWrap", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaAddressModeClamp"] = {"hipAddressModeClamp", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaAddressModeMirror"] = {"hipAddressModeMirror", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaAddressModeBorder"] = {"hipAddressModeBorder", CONV_TEX, API_RUNTIME}; // functions - cuda2hipRename["cudaCreateTextureObject"] = {"hipCreateTextureObject", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaDestroyTextureObject"] = {"hipDestroyTextureObject", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGetTextureObjectResourceDesc"] = {"hipGetTextureObjectResourceDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGetTextureObjectResourceViewDesc"] = {"hipGetTextureObjectResourceViewDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGetTextureObjectTextureDesc"] = {"hipGetTextureObjectTextureDesc", CONV_TEX, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaCreateTextureObject"] = {"hipCreateTextureObject", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaDestroyTextureObject"] = {"hipDestroyTextureObject", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaGetTextureObjectResourceDesc"] = {"hipGetTextureObjectResourceDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaGetTextureObjectResourceViewDesc"] = {"hipGetTextureObjectResourceViewDesc", CONV_TEX, API_RUNTIME}; + cuda2hipRename["cudaGetTextureObjectTextureDesc"] = {"hipGetTextureObjectTextureDesc", CONV_TEX, API_RUNTIME}; // Surface Reference Management // unsupported yet by HIP @@ -1718,28 +1938,26 @@ struct cuda2hipMap { cuda2hipRename["cudaIpcMemHandle_st"] = {"hipIpcMemHandle_t", CONV_TYPE, API_RUNTIME}; // IPC functions - cuda2hipRename["cudaIpcCloseMemHandle"] = {"hipIpcCloseMemHandle", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaIpcGetEventHandle"] = {"hipIpcGetEventHandle", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaIpcGetMemHandle"] = {"hipIpcGetMemHandle", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaIpcOpenEventHandle"] = {"hipIpcOpenEventHandle", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaIpcOpenMemHandle"] = {"hipIpcOpenMemHandle", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaIpcCloseMemHandle"] = {"hipIpcCloseMemHandle", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaIpcGetEventHandle"] = {"hipIpcGetEventHandle", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaIpcGetMemHandle"] = {"hipIpcGetMemHandle", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaIpcOpenEventHandle"] = {"hipIpcOpenEventHandle", CONV_DEVICE, API_RUNTIME}; + cuda2hipRename["cudaIpcOpenMemHandle"] = {"hipIpcOpenMemHandle", CONV_DEVICE, API_RUNTIME}; // OpenGL Interoperability - // unsupported yet by HIP cuda2hipRename["cudaGLGetDevices"] = {"hipGLGetDevices", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaGraphicsGLRegisterBuffer"] = {"hipGraphicsGLRegisterBuffer", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaGraphicsGLRegisterImage"] = {"hipGraphicsGLRegisterImage", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaWGLGetDevice"] = {"hipWGLGetDevice", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // Graphics Interoperability - // unsupported yet by HIP - cuda2hipRename["cudaGraphicsMapResources"] = {"hipGraphicsMapResources", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsResourceGetMappedMipmappedArray"] = {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsResourceGetMappedPointer"] = {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsResourceSetMapFlags"] = {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsSubResourceGetMappedArray"] = {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsUnmapResources"] = {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; - cuda2hipRename["cudaGraphicsUnregisterResource"] = {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; + cuda2hipRename["cudaGraphicsMapResources"] = {"hipGraphicsMapResources", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsMapResources) + cuda2hipRename["cudaGraphicsResourceGetMappedMipmappedArray"] = {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsResourceGetMappedMipmappedArray) + cuda2hipRename["cudaGraphicsResourceGetMappedPointer"] = {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsResourceGetMappedPointer) + cuda2hipRename["cudaGraphicsResourceSetMapFlags"] = {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsResourceSetMapFlags) + cuda2hipRename["cudaGraphicsSubResourceGetMappedArray"] = {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsSubResourceGetMappedArray) + cuda2hipRename["cudaGraphicsUnmapResources"] = {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsUnmapResources) + cuda2hipRename["cudaGraphicsUnregisterResource"] = {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsUnregisterResource) cuda2hipRename["cudaGraphicsCubeFace"] = {"hipGraphicsCubeFace", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; cuda2hipRename["cudaGraphicsCubeFacePositiveX"] = {"hipGraphicsCubeFacePositiveX", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; @@ -1763,6 +1981,142 @@ struct cuda2hipMap { cuda2hipRename["cudaGraphicsRegisterFlagsSurfaceLoadStore"] = {"hipGraphicsRegisterFlagsSurfaceLoadStore", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // 4 // API_Driver ANALOGUE (CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04) cuda2hipRename["cudaGraphicsRegisterFlagsTextureGather"] = {"hipGraphicsRegisterFlagsTextureGather", CONV_GRAPHICS, API_RUNTIME, HIP_UNSUPPORTED}; // 8 // API_Driver ANALOGUE (CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08) + // OpenGL Interoperability + // enum cudaGLDeviceList + cuda2hipRename["cudaGLDeviceList"] = {"hipGLDeviceList", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUGLDeviceList) + cuda2hipRename["cudaGLDeviceListAll"] = {"HIP_GL_DEVICE_LIST_ALL", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_Driver ANALOGUE (CU_GL_DEVICE_LIST_ALL) + cuda2hipRename["cudaGLDeviceListCurrentFrame"] = {"HIP_GL_DEVICE_LIST_CURRENT_FRAME", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_Driver ANALOGUE (CU_GL_DEVICE_LIST_CURRENT_FRAME) + cuda2hipRename["cudaGLDeviceListNextFrame"] = {"HIP_GL_DEVICE_LIST_NEXT_FRAME", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x03 // API_Driver ANALOGUE (CU_GL_DEVICE_LIST_NEXT_FRAME) + + cuda2hipRename["cudaGLGetDevices"] = {"hipGLGetDevices", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLGetDevices) + cuda2hipRename["cudaGraphicsGLRegisterBuffer"] = {"hipGraphicsGLRegisterBuffer", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsGLRegisterBuffer) + cuda2hipRename["cudaGraphicsGLRegisterImage"] = {"hipGraphicsGLRegisterImage", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsGLRegisterImage) + cuda2hipRename["cudaWGLGetDevice"] = {"hipWGLGetDevice", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuWGLGetDevice) + + // OpenGL Interoperability [DEPRECATED] + // enum cudaGLMapFlags + cuda2hipRename["cudaGLMapFlags"] = {"hipGLMapFlags", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUGLmap_flags) + cuda2hipRename["cudaGLMapFlagsNone"] = {"HIP_GL_MAP_RESOURCE_FLAGS_NONE", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x00 // API_Driver ANALOGUE (CU_GL_MAP_RESOURCE_FLAGS_NONE) + cuda2hipRename["cudaGLMapFlagsReadOnly"] = {"HIP_GL_MAP_RESOURCE_FLAGS_READ_ONLY", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x01 // API_Driver ANALOGUE (CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY) + cuda2hipRename["cudaGLMapFlagsWriteDiscard"] = {"HIP_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // 0x02 // API_Driver ANALOGUE (CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD) + + cuda2hipRename["cudaGLMapBufferObject"] = {"hipGLMapBufferObject__", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cuGLMapBufferObject due to different signatures + cuda2hipRename["cudaGLMapBufferObjectAsync"] = {"hipGLMapBufferObjectAsync__", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // Not equal to cuGLMapBufferObjectAsync due to different signatures + cuda2hipRename["cudaGLRegisterBufferObject"] = {"hipGLRegisterBufferObject", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLRegisterBufferObject) + cuda2hipRename["cudaGLSetBufferObjectMapFlags"] = {"hipGLSetBufferObjectMapFlags", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLSetBufferObjectMapFlags) + cuda2hipRename["cudaGLSetGLDevice"] = {"hipGLSetGLDevice", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // no API_Driver ANALOGUE + cuda2hipRename["cudaGLUnmapBufferObject"] = {"hipGLUnmapBufferObject", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLUnmapBufferObject) + cuda2hipRename["cudaGLUnmapBufferObjectAsync"] = {"hipGLUnmapBufferObjectAsync", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLUnmapBufferObjectAsync) + cuda2hipRename["cudaGLUnregisterBufferObject"] = {"hipGLUnregisterBufferObject", CONV_GL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGLUnregisterBufferObject) + + // Direct3D 9 Interoperability + // enum CUd3d9DeviceList + cuda2hipRename["cudaD3D9DeviceList"] = {"hipD3D9DeviceList", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d9DeviceList) + cuda2hipRename["cudaD3D9DeviceListAll"] = {"HIP_D3D9_DEVICE_LIST_ALL", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D9_DEVICE_LIST_ALL) + cuda2hipRename["cudaD3D9DeviceListCurrentFrame"] = {"HIP_D3D9_DEVICE_LIST_CURRENT_FRAME", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_Driver ANALOGUE (CU_D3D9_DEVICE_LIST_CURRENT_FRAME) + cuda2hipRename["cudaD3D9DeviceListNextFrame"] = {"HIP_D3D9_DEVICE_LIST_NEXT_FRAME", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 3 // API_Driver ANALOGUE (CU_D3D9_DEVICE_LIST_NEXT_FRAME) + + cuda2hipRename["cudaD3D9GetDevice"] = {"hipD3D9GetDevice", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9GetDevice) + cuda2hipRename["cudaD3D9GetDevices"] = {"hipD3D9GetDevices", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9GetDevices) + cuda2hipRename["cudaD3D9GetDirect3DDevice"] = {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9GetDirect3DDevice) + cuda2hipRename["cudaD3D9SetDirect3DDevice"] = {"hipD3D9SetDirect3DDevice", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // no API_Driver ANALOGUE + cuda2hipRename["cudaGraphicsD3D9RegisterResource"] = {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsD3D9RegisterResource) + + // Direct3D 9 Interoperability [DEPRECATED] + // enum cudaD3D9MapFlags + cuda2hipRename["cudaD3D9MapFlags"] = {"hipD3D9MapFlags", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d9map_flags) + cuda2hipRename["cudaD3D9MapFlagsNone"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_NONE", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_Driver ANALOGUE (CU_D3D9_MAPRESOURCE_FLAGS_NONE) + cuda2hipRename["cudaD3D9MapFlagsReadOnly"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_READONLY", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D9_MAPRESOURCE_FLAGS_READONLY) + cuda2hipRename["cudaD3D9MapFlagsWriteDiscard"] = {"HIP_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_Driver ANALOGUE (CU_D3D9_MAPRESOURCE_FLAGS_WRITEDISCARD) + + // enum cudaD3D9RegisterFlags + cuda2hipRename["cudaD3D9RegisterFlags"] = {"hipD3D9RegisterFlags", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d9Register_flags) + cuda2hipRename["cudaD3D9RegisterFlagsNone"] = {"HIP_D3D9_REGISTER_FLAGS_NONE", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_Driver ANALOGUE (CU_D3D9_REGISTER_FLAGS_NONE) + cuda2hipRename["cudaD3D9RegisterFlagsArray"] = {"HIP_D3D9_REGISTER_FLAGS_ARRAY", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D9_REGISTER_FLAGS_ARRAY) + + cuda2hipRename["cudaD3D9MapResources"] = {"hipD3D9MapResources", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9MapResources) + cuda2hipRename["cudaD3D9RegisterResource"] = {"hipD3D9RegisterResource", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9RegisterResource) + cuda2hipRename["cudaD3D9ResourceGetMappedArray"] = {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9ResourceGetMappedArray) + cuda2hipRename["cudaD3D9ResourceGetMappedPitch"] = {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cudaD3D9ResourceGetMappedPitch) + cuda2hipRename["cudaD3D9ResourceGetMappedPointer"] = {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9ResourceGetMappedPointer) + cuda2hipRename["cudaD3D9ResourceGetMappedSize"] = {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9ResourceGetMappedSize) + cuda2hipRename["cudaD3D9ResourceGetSurfaceDimensions"] = {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9ResourceGetSurfaceDimensions) + cuda2hipRename["cudaD3D9ResourceSetMapFlags"] = {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9ResourceSetMapFlags) + cuda2hipRename["cudaD3D9UnmapResources"] = {"hipD3D9UnmapResources", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9UnmapResources) + cuda2hipRename["cudaD3D9UnregisterResource"] = {"hipD3D9UnregisterResource", CONV_D3D9, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D9UnregisterResource) + + // Direct3D 10 Interoperability + // enum cudaD3D10DeviceList + cuda2hipRename["cudaD3D10DeviceList"] = {"hipd3d10DeviceList", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d10DeviceList) + cuda2hipRename["cudaD3D10DeviceListAll"] = {"HIP_D3D10_DEVICE_LIST_ALL", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D10_DEVICE_LIST_ALL) + cuda2hipRename["cudaD3D10DeviceListCurrentFrame"] = {"HIP_D3D10_DEVICE_LIST_CURRENT_FRAME", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_Driver ANALOGUE (CU_D3D10_DEVICE_LIST_CURRENT_FRAME) + cuda2hipRename["cudaD3D10DeviceListNextFrame"] = {"HIP_D3D10_DEVICE_LIST_NEXT_FRAME", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 3 // API_Driver ANALOGUE (CU_D3D10_DEVICE_LIST_NEXT_FRAME) + + cuda2hipRename["cudaD3D10GetDevice"] = {"hipD3D10GetDevice", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10GetDevice) + cuda2hipRename["cudaD3D10GetDevices"] = {"hipD3D10GetDevices", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10GetDevices) + cuda2hipRename["cudaGraphicsD3D10RegisterResource"] = {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsD3D10RegisterResource) + + // Direct3D 10 Interoperability [DEPRECATED] + // enum cudaD3D10MapFlags + cuda2hipRename["cudaD3D10MapFlags"] = {"hipD3D10MapFlags", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d10map_flags) + cuda2hipRename["cudaD3D10MapFlagsNone"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_NONE", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_Driver ANALOGUE (CU_D3D10_MAPRESOURCE_FLAGS_NONE) + cuda2hipRename["cudaD3D10MapFlagsReadOnly"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_READONLY", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D10_MAPRESOURCE_FLAGS_READONLY) + cuda2hipRename["cudaD3D10MapFlagsWriteDiscard"] = {"HIP_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_Driver ANALOGUE (CU_D3D10_MAPRESOURCE_FLAGS_WRITEDISCARD) + + // enum cudaD3D10RegisterFlags + cuda2hipRename["cudaD3D10RegisterFlags"] = {"hipD3D10RegisterFlags", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d10Register_flags) + cuda2hipRename["cudaD3D10RegisterFlagsNone"] = {"HIP_D3D10_REGISTER_FLAGS_NONE", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 0 // API_Driver ANALOGUE (CU_D3D10_REGISTER_FLAGS_NONE) + cuda2hipRename["cudaD3D10RegisterFlagsArray"] = {"HIP_D3D10_REGISTER_FLAGS_ARRAY", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D10_REGISTER_FLAGS_ARRAY) + + cuda2hipRename["cudaD3D10GetDirect3DDevice"] = {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cudaD3D10GetDirect3DDevice) + cuda2hipRename["cudaD3D10MapResources"] = {"hipD3D10MapResources", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10MapResources) + cuda2hipRename["cudaD3D10RegisterResource"] = {"hipD3D10RegisterResource", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10RegisterResource) + cuda2hipRename["cudaD3D10ResourceGetMappedArray"] = {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10ResourceGetMappedArray) + cuda2hipRename["cudaD3D10ResourceGetMappedPitch"] = {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cudaD3D10ResourceGetMappedPitch) + cuda2hipRename["cudaD3D10ResourceGetMappedPointer"] = {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10ResourceGetMappedPointer) + cuda2hipRename["cudaD3D10ResourceGetMappedSize"] = {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10ResourceGetMappedSize) + cuda2hipRename["cudaD3D10ResourceGetSurfaceDimensions"] = {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10ResourceGetSurfaceDimensions) + cuda2hipRename["cudaD3D10ResourceSetMapFlags"] = {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10ResourceSetMapFlags) + cuda2hipRename["cudaD3D10SetDirect3DDevice"] = {"hipD3D10SetDirect3DDevice", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // no API_Driver ANALOGUE + cuda2hipRename["cudaD3D10UnmapResources"] = {"hipD3D10UnmapResources", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10UnmapResources) + cuda2hipRename["cudaD3D10UnregisterResource"] = {"hipD3D10UnregisterResource", CONV_D3D10, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D10UnregisterResource) + + // Direct3D 11 Interoperability + // enum cudaD3D11DeviceList + cuda2hipRename["cudaD3D11DeviceList"] = {"hipd3d11DeviceList", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUd3d11DeviceList) + cuda2hipRename["cudaD3D11DeviceListAll"] = {"HIP_D3D11_DEVICE_LIST_ALL", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // 1 // API_Driver ANALOGUE (CU_D3D11_DEVICE_LIST_ALL) + cuda2hipRename["cudaD3D11DeviceListCurrentFrame"] = {"HIP_D3D11_DEVICE_LIST_CURRENT_FRAME", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // 2 // API_Driver ANALOGUE (CU_D3D11_DEVICE_LIST_CURRENT_FRAME) + cuda2hipRename["cudaD3D11DeviceListNextFrame"] = {"HIP_D3D11_DEVICE_LIST_NEXT_FRAME", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // 3 // API_Driver ANALOGUE (CU_D3D11_DEVICE_LIST_NEXT_FRAME) + + cuda2hipRename["cudaD3D11GetDevice"] = {"hipD3D11GetDevice", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D11GetDevice) + cuda2hipRename["cudaD3D11GetDevices"] = {"hipD3D11GetDevices", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D11GetDevices) + cuda2hipRename["cudaGraphicsD3D11RegisterResource"] = {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsD3D11RegisterResource) + + // Direct3D 11 Interoperability [DEPRECATED] + cuda2hipRename["cudaD3D11GetDevice"] = {"hipD3D11GetDevice", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D11GetDevice) + cuda2hipRename["cudaD3D11GetDevices"] = {"hipD3D11GetDevices", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuD3D11GetDevices) + cuda2hipRename["cudaGraphicsD3D11RegisterResource"] = {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsD3D11RegisterResource) + + // VDPAU Interoperability + cuda2hipRename["cudaGraphicsVDPAURegisterOutputSurface"] = {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsVDPAURegisterOutputSurface) + cuda2hipRename["cudaGraphicsVDPAURegisterVideoSurface"] = {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsVDPAURegisterVideoSurface) + cuda2hipRename["cudaVDPAUGetDevice"] = {"hipVDPAUGetDevice", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuVDPAUGetDevice) + cuda2hipRename["cudaVDPAUSetVDPAUDevice"] = {"hipVDPAUSetDevice", CONV_VDPAU, API_RUNTIME, HIP_UNSUPPORTED}; // no API_Driver ANALOGUE + + // EGL Interoperability + cuda2hipRename["cudaEglStreamConnection"] = {"hipEglStreamConnection", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (CUeglStreamConnection) + + cuda2hipRename["cudaEGLStreamConsumerAcquireFrame"] = {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamConsumerAcquireFrame) + cuda2hipRename["cudaEGLStreamConsumerConnect"] = {"hipEGLStreamConsumerConnect", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamConsumerConnect) + cuda2hipRename["cudaEGLStreamConsumerConnectWithFlags"] = {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamConsumerConnectWithFlags) + cuda2hipRename["cudaEGLStreamConsumerReleaseFrame"] = {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamConsumerReleaseFrame) + cuda2hipRename["cudaEGLStreamProducerConnect"] = {"hipEGLStreamProducerConnect", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamProducerConnect) + cuda2hipRename["cudaEGLStreamProducerDisconnect"] = {"hipEGLStreamProducerDisconnect", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamProducerDisconnect) + cuda2hipRename["cudaEGLStreamProducerPresentFrame"] = {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamProducerPresentFrame) + cuda2hipRename["cudaEGLStreamProducerReturnFrame"] = {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuEGLStreamProducerReturnFrame) + cuda2hipRename["cudaGraphicsEGLRegisterImage"] = {"hipGraphicsEGLRegisterImage", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsEGLRegisterImage) + cuda2hipRename["cudaGraphicsResourceGetMappedEglFrame"] = {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_RUNTIME, HIP_UNSUPPORTED}; // API_Driver ANALOGUE (cuGraphicsResourceGetMappedEglFrame) + //---------------------------------------BLAS-------------------------------------// // Blas types cuda2hipRename["cublasHandle_t"] = {"hipblasHandle_t", CONV_TYPE, API_BLAS}; diff --git a/include/hip/hcc_detail/driver_types.h b/include/hip/hcc_detail/driver_types.h index 3578ddc609..ce5e9789be 100644 --- a/include/hip/hcc_detail/driver_types.h +++ b/include/hip/hcc_detail/driver_types.h @@ -25,20 +25,220 @@ THE SOFTWARE. enum hipChannelFormatKind { - hipChannelFormatKindSigned = 0, - hipChannelFormatKindUnsigned = 1, - hipChannelFormatKindFloat = 2, - hipChannelFormatKindNone = 3 + hipChannelFormatKindSigned = 0, + hipChannelFormatKindUnsigned = 1, + hipChannelFormatKindFloat = 2, + hipChannelFormatKindNone = 3 }; struct hipChannelFormatDesc { - int x; - int y; - int z; - int w; - enum hipChannelFormatKind f; + int x; + int y; + int z; + int w; + enum hipChannelFormatKind f; }; +struct hipArray { + void* data; //FIXME: generalize this + struct hipChannelFormatDesc desc; + unsigned int type; + unsigned int width; + unsigned int height; + unsigned int depth; +}; + +typedef struct hipArray* hipArray_t; + +typedef const struct hipArray* hipArray_const_t; + +// TODO: It needs to be modified since it was just copied from hipArray. +struct hipMipmappedArray { + void* data; //FIXME: generalize this + struct hipChannelFormatDesc desc; + unsigned int width; + unsigned int height; + unsigned int depth; +}; + +typedef struct hipMipmappedArray *hipMipmappedArray_t; + +typedef const struct hipMipmappedArray *hipMipmappedArray_const_t; + +/** + * hip resource types + */ +enum hipResourceType +{ + hipResourceTypeArray = 0x00, + hipResourceTypeMipmappedArray = 0x01, + hipResourceTypeLinear = 0x02, + hipResourceTypePitch2D = 0x03 +}; + +/** + * hip texture resource view formats + */ +enum hipResourceViewFormat +{ + hipResViewFormatNone = 0x00, + hipResViewFormatUnsignedChar1 = 0x01, + hipResViewFormatUnsignedChar2 = 0x02, + hipResViewFormatUnsignedChar4 = 0x03, + hipResViewFormatSignedChar1 = 0x04, + hipResViewFormatSignedChar2 = 0x05, + hipResViewFormatSignedChar4 = 0x06, + hipResViewFormatUnsignedShort1 = 0x07, + hipResViewFormatUnsignedShort2 = 0x08, + hipResViewFormatUnsignedShort4 = 0x09, + hipResViewFormatSignedShort1 = 0x0a, + hipResViewFormatSignedShort2 = 0x0b, + hipResViewFormatSignedShort4 = 0x0c, + hipResViewFormatUnsignedInt1 = 0x0d, + hipResViewFormatUnsignedInt2 = 0x0e, + hipResViewFormatUnsignedInt4 = 0x0f, + hipResViewFormatSignedInt1 = 0x10, + hipResViewFormatSignedInt2 = 0x11, + hipResViewFormatSignedInt4 = 0x12, + hipResViewFormatHalf1 = 0x13, + hipResViewFormatHalf2 = 0x14, + hipResViewFormatHalf4 = 0x15, + hipResViewFormatFloat1 = 0x16, + hipResViewFormatFloat2 = 0x17, + hipResViewFormatFloat4 = 0x18, + hipResViewFormatUnsignedBlockCompressed1 = 0x19, + hipResViewFormatUnsignedBlockCompressed2 = 0x1a, + hipResViewFormatUnsignedBlockCompressed3 = 0x1b, + hipResViewFormatUnsignedBlockCompressed4 = 0x1c, + hipResViewFormatSignedBlockCompressed4 = 0x1d, + hipResViewFormatUnsignedBlockCompressed5 = 0x1e, + hipResViewFormatSignedBlockCompressed5 = 0x1f, + hipResViewFormatUnsignedBlockCompressed6H = 0x20, + hipResViewFormatSignedBlockCompressed6H = 0x21, + hipResViewFormatUnsignedBlockCompressed7 = 0x22 +}; + +/** + * HIP resource descriptor + */ +struct hipResourceDesc { + enum hipResourceType resType; + + union { + struct { + hipArray_t array; + } array; + struct { + hipMipmappedArray_t mipmap; + } mipmap; + struct { + void *devPtr; + struct hipChannelFormatDesc desc; + size_t sizeInBytes; + } linear; + struct { + void *devPtr; + struct hipChannelFormatDesc desc; + size_t width; + size_t height; + size_t pitchInBytes; + } pitch2D; + } res; +}; + +/** + * hip resource view descriptor + */ +struct hipResourceViewDesc +{ + enum hipResourceViewFormat format; + size_t width; + size_t height; + size_t depth; + unsigned int firstMipmapLevel; + unsigned int lastMipmapLevel; + unsigned int firstLayer; + unsigned int lastLayer; +}; + +/** + * Memory copy types + * + */ +typedef enum hipMemcpyKind { + hipMemcpyHostToHost = 0, ///< Host-to-Host Copy + hipMemcpyHostToDevice = 1, ///< Host-to-Device Copy + hipMemcpyDeviceToHost = 2, ///< Device-to-Host Copy + hipMemcpyDeviceToDevice =3, ///< Device-to-Device Copy + hipMemcpyDefault = 4 ///< Runtime will automatically determine copy-kind based on virtual addresses. +} hipMemcpyKind; + +struct hipPitchedPtr +{ + void *ptr; + size_t pitch; + size_t xsize; + size_t ysize; +}; + +struct hipExtent { + size_t width; // Width in elements when referring to array memory, in bytes when referring to linear memory + size_t height; + size_t depth; +}; + +struct hipPos { + size_t x; + size_t y; + size_t z; +}; + +struct hipMemcpy3DParms { + hipArray_t srcArray; + struct hipPos srcPos; + struct hipPitchedPtr srcPtr; + + hipArray_t dstArray; + struct hipPos dstPos; + struct hipPitchedPtr dstPtr; + + struct hipExtent extent; + enum hipMemcpyKind kind; +}; + +static __inline__ struct hipPitchedPtr make_hipPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz) +{ + struct hipPitchedPtr s; + + s.ptr = d; + s.pitch = p; + s.xsize = xsz; + s.ysize = ysz; + + return s; +} + +static __inline__ struct hipPos make_hipPos(size_t x, size_t y, size_t z) +{ + struct hipPos p; + + p.x = x; + p.y = y; + p.z = z; + + return p; +} + +static __inline__ struct hipExtent make_hipExtent(size_t w, size_t h, size_t d) +{ + struct hipExtent e; + + e.width = w; + e.height = h; + e.depth = d; + + return e; +} #endif diff --git a/include/hip/hcc_detail/grid_launch_GGL.hpp b/include/hip/hcc_detail/grid_launch_GGL.hpp index eac48b595e..4c632f9d68 100644 --- a/include/hip/hcc_detail/grid_launch_GGL.hpp +++ b/include/hip/hcc_detail/grid_launch_GGL.hpp @@ -28,7 +28,7 @@ THE SOFTWARE. #include "helpers.hpp" #include "hc.hpp" -#include "hip_hcc.h" +#include "hip/hip_hcc.h" #include "hip_runtime.h" #include diff --git a/include/hip/hcc_detail/hip_db.h b/include/hip/hcc_detail/hip_db.h new file mode 100644 index 0000000000..eb5c3c0ac8 --- /dev/null +++ b/include/hip/hcc_detail/hip_db.h @@ -0,0 +1,22 @@ +/** + * @defgroup HipDb HCC-specific debug facilities + * @{ + */ + + +/** + * @brief * Print memory tracker information for this pointer. + * + * HIP maintains a table for all memory allocations performed by the application. + * If targetAddress is 0, the entire table is printed to stderr. + * If targetAddress is non-null, this routine will perform some forensic analysis + * to find the pointer + */ +void hipdbPrintMem(void *targetAddress); + + + +// doxygen end HipDb +/** + * @} + */ diff --git a/include/hip/hcc_detail/hip_hcc.h b/include/hip/hcc_detail/hip_hcc.h deleted file mode 100644 index fc04917931..0000000000 --- a/include/hip/hcc_detail/hip_hcc.h +++ /dev/null @@ -1,103 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_HCC_H - -#include "hip/hip_runtime_api.h" - -#if __cplusplus -#ifdef __HCC__ -#include - - -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup HCC-specific features - * @warning These APIs provide access to special features of HCC compiler and are not available through the CUDA path. - * @{ - */ - - -/** - * @brief Return hc::accelerator associated with the specified deviceId - * @return #hipSuccess, #hipErrorInvalidDevice - */ -hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc); - -/** - * @brief Return hc::accelerator_view associated with the specified stream - * - * If stream is 0, the accelerator_view for the default stream is returned. - * @return #hipSuccess - */ -hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av); - - -#endif // #ifdef __HCC__ - -/** - * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra - * - * @param [in[ f Kernel to launch. - * @param [in] gridDimX X grid dimension specified in work-items - * @param [in] gridDimY Y grid dimension specified in work-items - * @param [in] gridDimZ Z grid dimension specified in work-items - * @param [in] blockDimX X block dimensions specified in work-items - * @param [in] blockDimY Y grid dimension specified in work-items - * @param [in] blockDimZ Z grid dimension specified in work-items - * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules. - * @param [in] kernelParams - * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel. - * @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API. - * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API. - * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue - * - * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage. - - * HIP/ROCm actually updates the start event when the associated kernel completes. - */ -hipError_t hipHccModuleLaunchKernel(hipFunction_t f, - uint32_t globalWorkSizeX, - uint32_t globalWorkSizeY, - uint32_t globalWorkSizeZ, - uint32_t localWorkSizeX, - uint32_t localWorkSizeY, - uint32_t localWorkSizeZ, - size_t sharedMemBytes, - hipStream_t hStream, - void **kernelParams, - void **extra, - hipEvent_t startEvent=nullptr, - hipEvent_t stopEvent=nullptr - ); - -// doxygen end HCC-specific features -/** - * @} - */ -#endif // #if __cplusplus - -#endif // diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h index da3b7ba50e..379fc05f5b 100644 --- a/include/hip/hcc_detail/hip_runtime.h +++ b/include/hip/hcc_detail/hip_runtime.h @@ -50,14 +50,6 @@ THE SOFTWARE. #include -#if USE_PROMOTE_FREE_HCC == 1 -#define ADDRESS_SPACE_1 -#define ADDRESS_SPACE_3 -#else -#define ADDRESS_SPACE_1 __attribute__((address_space(1))) -#define ADDRESS_SPACE_3 __attribute__((address_space(3))) -#endif - //--- // Remainder of this file only compiles with HCC #if defined __HCC__ @@ -89,12 +81,12 @@ namespace hip_impl extern int HIP_TRACE_API; #ifdef __cplusplus -//#include #include #endif #include #include #include +#include // TODO-HCC remove old definitions ; ~1602 hcc supports __HCC_ACCELERATOR__ define. #if defined (__KALMAR_ACCELERATOR__) && !defined (__HCC_ACCELERATOR__) @@ -107,7 +99,7 @@ extern int HIP_TRACE_API; // TODO-HCC add a dummy implementation of assert, need to replace with a proper kernel exit call. #if __HIP_DEVICE_COMPILE__ == 1 #undef assert - #define assert(COND) { if (COND) {} } + #define assert(COND) { if (!(COND)) {abort();} } #endif @@ -138,7 +130,7 @@ extern int HIP_TRACE_API; #define __HIP_ARCH_HAS_WARP_FUNNEL_SHIFT__ (0) //sync -#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (0) +#define __HIP_ARCH_HAS_THREAD_FENCE_SYSTEM__ (1) #define __HIP_ARCH_HAS_SYNC_THREAD_EXT__ (0) // misc diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 724bf09b21..03be587b0d 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -37,7 +37,8 @@ THE SOFTWARE. #include #include -#include +#include +#include #if defined (__HCC__) && (__hcc_workweek__ < 16155) #error("This version of HIP requires a newer version of HCC."); @@ -136,6 +137,11 @@ enum hipLimit_t #define hipDeviceMapHost 0x8 #define hipDeviceLmemResizeToMax 0x16 +#define hipArrayDefault 0x00 ///< Default HIP array allocation flag +#define hipArrayLayered 0x01 +#define hipArraySurfaceLoadStore 0x02 +#define hipArrayCubemap 0x04 +#define hipArrayTextureGather 0x08 /* * @brief hipJitOption @@ -165,7 +171,7 @@ typedef enum hipJitOption { /** - * @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored. + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. */ typedef enum hipFuncCache_t { hipFuncCachePreferNone, ///< no preference for shared memory or L1 (default) @@ -176,7 +182,7 @@ typedef enum hipFuncCache_t { /** - * @warning On AMD devices and recent Nvidia devices, these hints and controls are ignored. + * @warning On AMD devices and some Nvidia devices, these hints and controls are ignored. */ typedef enum hipSharedMemConfig { hipSharedMemBankSizeDefault, ///< The compiler selects a device-specific value for the banking. @@ -200,27 +206,6 @@ typedef struct dim3 { } dim3; -/** - * Memory copy types - * - */ -typedef enum hipMemcpyKind { - hipMemcpyHostToHost = 0 ///< Host-to-Host Copy - ,hipMemcpyHostToDevice = 1 ///< Host-to-Device Copy - ,hipMemcpyDeviceToHost = 2 ///< Device-to-Host Copy - ,hipMemcpyDeviceToDevice =3 ///< Device-to-Device Copy - ,hipMemcpyDefault = 4, ///< Runtime will automatically determine copy-kind based on virtual addresses. -} hipMemcpyKind; - -typedef struct { - unsigned int width; - unsigned int height; - enum hipChannelFormatKind f; - void* data; //FIXME: generalize this -} hipArray; - - - // Doxygen end group GlobalDefs /** @} */ @@ -379,7 +364,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); * @param [in] cacheConfig * * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ); @@ -391,7 +376,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache_t cacheConfig ); * @param [in] cacheConfig * * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ hipError_t hipDeviceGetCacheConfig ( hipFuncCache_t *cacheConfig ); @@ -415,7 +400,7 @@ hipError_t hipDeviceGetLimit(size_t *pValue, enum hipLimit_t limit); * @param [in] config; * * @returns #hipSuccess, #hipErrorInitializationError - * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. + * Note: AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t config ); @@ -427,7 +412,7 @@ hipError_t hipFuncSetCacheConfig (const void* func, hipFuncCache_t config ); * * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError * - * Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * */ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ); @@ -440,7 +425,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ); * * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInitializationError * - * Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. + * Note: AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * */ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ); @@ -1287,6 +1272,19 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream); #endif +/** + * @brief Fills the memory area pointed to by dst with the constant value. + * + * @param[out] dst Pointer to device memory + * @param[in] pitch - data size in bytes + * @param[in] value - constant value to be set + * @param[in] width + * @param[in] height + * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree + */ + +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height); + /** * @brief Query memory info. * Return snapshot of free memory, and total allocatable memory on the device. @@ -1315,7 +1313,7 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size); */ #if __cplusplus hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, - size_t width, size_t height = 0, unsigned int flags = 0); + size_t width, size_t height = 0, unsigned int flags = hipArrayDefault); #else hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags); @@ -1330,6 +1328,22 @@ hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* d */ hipError_t hipFreeArray(hipArray* array); +/** + * @brief Allocate an array on the device. + * + * @param[out] array Pointer to allocated array in device memory + * @param[in] desc Requested channel format + * @param[in] extent Requested array allocation width, height and depth + * @param[in] flags Requested properties of allocated array + * @return #hipSuccess, #hipErrorMemoryAllocation + * + * @see hipMalloc, hipMallocPitch, hipFree, hipFreeArray, hipHostMalloc, hipHostFree + */ + +hipError_t hipMalloc3DArray(hipArray_t *array, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, + unsigned int flags); /** * @brief Copies data between host and device. * @@ -1402,6 +1416,7 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, size_t count, hipMemcpyKind kind); +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p); // doxygen end Memory /** @@ -1434,7 +1449,6 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, * * @returns #hipSuccess, * @returns #hipErrorInvalidDevice if deviceId or peerDeviceId are not valid devices - * @warning PeerToPeer support is experimental. */ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId); @@ -1452,7 +1466,6 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDev * * Returns #hipSuccess, #hipErrorInvalidDevice, #hipErrorInvalidValue, * @returns #hipErrorPeerAccessAlreadyEnabled if peer access is already enabled for this device. - * @warning PeerToPeer support is experimental. */ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); @@ -1465,7 +1478,6 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags); * @param [in] peerDeviceId * * @returns #hipSuccess, #hipErrorPeerAccessNotEnabled - * @warning PeerToPeer support is experimental. */ hipError_t hipDeviceDisablePeerAccess (int peerDeviceId); @@ -1497,7 +1509,6 @@ hipError_t hipMemGetAddressRange ( hipDeviceptr_t* pbase, size_t* psize, hipDevi * @param [in] sizeBytes - Size of memory copy in bytes * * @returns #hipSuccess, #hipErrorInvalidValue, #hipErrorInvalidDevice - * @warning PeerToPeer support is experimental. */ hipError_t hipMemcpyPeer (void* dst, int dstDeviceId, const void* src, int srcDeviceId, size_t sizeBytes); @@ -1656,7 +1667,7 @@ hipError_t hipCtxGetApiVersion (hipCtx_t ctx,int *apiVersion); * * @return #hipSuccess * - * @warning AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice */ @@ -1669,7 +1680,7 @@ hipError_t hipCtxGetCacheConfig ( hipFuncCache_t *cacheConfig ); * * @return #hipSuccess * - * @warning AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. + * @warning AMD devices and some Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice */ @@ -1682,7 +1693,7 @@ hipError_t hipCtxSetCacheConfig ( hipFuncCache_t cacheConfig ); * * @return #hipSuccess * - * @warning AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice */ @@ -1695,7 +1706,7 @@ hipError_t hipCtxSetSharedMemConfig ( hipSharedMemConfig config ); * * @return #hipSuccess * - * @warning AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. + * @warning AMD devices and some Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * * @see hipCtxCreate, hipCtxDestroy, hipCtxGetFlags, hipCtxPopCurrent, hipCtxGetCurrent, hipCtxSetCurrent, hipCtxPushCurrent, hipCtxSetCacheConfig, hipCtxSynchronize, hipCtxGetDevice */ @@ -1867,7 +1878,7 @@ hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len,int device); * * @returns #hipSuccess, #hipErrorInavlidDevice, #hipErrorInvalidValue */ -hipError_t hipDeviceGetByPCIBusId ( int* device,const int* pciBusId ); +hipError_t hipDeviceGetByPCIBusId ( int* device,const char* pciBusId ); /** @@ -2150,6 +2161,24 @@ hipError_t hipIpcCloseMemHandle(void *devPtr); #endif #ifdef __cplusplus + +hipError_t hipBindTexture(size_t* offset, + textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size = UINT_MAX); + +hipError_t ihipBindTextureImpl(int dim, + enum hipTextureReadMode readMode, + size_t *offset, + const void *devPtr, + const struct hipChannelFormatDesc& desc, + size_t size, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject); + /* * @brief hipBindTexture Binds size bytes of the memory area pointed to by @p devPtr to the texture reference tex. * @@ -2164,15 +2193,15 @@ hipError_t hipIpcCloseMemHandle(void *devPtr); * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ template -hipError_t hipBindTexture(size_t *offset, - struct texture &tex, - const void *devPtr, - const struct hipChannelFormatDesc *desc, - size_t size=UINT_MAX) +hipError_t hipBindTexture(size_t *offset, + struct texture& tex, + const void *devPtr, + const struct hipChannelFormatDesc& desc, + size_t size = UINT_MAX) { - tex._dataPtr = static_cast(devPtr); - - return hipSuccess; + return ihipBindTextureImpl(dim, readMode, offset, devPtr, desc, size, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); } /* @@ -2188,20 +2217,115 @@ hipError_t hipBindTexture(size_t *offset, * @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree, #hipErrorUnknown **/ template -hipError_t hipBindTexture(size_t *offset, - struct texture &tex, - const void *devPtr, - size_t size=UINT_MAX) +hipError_t hipBindTexture(size_t *offset, + struct texture& tex, + const void *devPtr, + size_t size = UINT_MAX) { - return hipBindTexture(offset, tex, devPtr, &tex.channelDesc, size); + return ihipBindTextureImpl(dim, readMode, offset, devPtr, tex.channelDesc, size, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); +} + +// C API +hipError_t hipBindTexture2D(size_t* offset, + textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch); + +hipError_t ihipBindTexture2DImpl(int dim, + enum hipTextureReadMode readMode, + size_t *offset, + const void *devPtr, + const struct hipChannelFormatDesc& desc, + size_t width, + size_t height, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject); + +template +hipError_t hipBindTexture2D(size_t *offset, + struct texture& tex, + const void *devPtr, + size_t width, + size_t height, + size_t pitch) +{ + return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, tex.channelDesc, width, height, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); } template -hipError_t hipBindTextureToArray(struct texture &tex, hipArray* array) { - tex.width = array->width; - tex.height = array->height; - tex._dataPtr = static_cast(array->data); - return hipSuccess; +hipError_t hipBindTexture2D(size_t *offset, + struct texture& tex, + const void *devPtr, + const struct hipChannelFormatDesc &desc, + size_t width, + size_t height, + size_t pitch) +{ + return ihipBindTexture2DImpl(dim, readMode, offset, devPtr, desc, width, height, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); +} + +//C API +hipError_t hipBindTextureToArray(textureReference* tex, + hipArray_const_t array, + const hipChannelFormatDesc* desc); + +hipError_t ihipBindTextureToArrayImpl(int dim, + enum hipTextureReadMode readMode, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject); + +template +hipError_t hipBindTextureToArray(struct texture& tex, + hipArray_const_t array) +{ + return ihipBindTextureToArrayImpl(dim, readMode, array, tex.channelDesc, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); +} + +template +hipError_t hipBindTextureToArray(struct texture& tex, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc) +{ + return ihipBindTextureToArrayImpl(dim, readMode, array, desc, + tex.addressMode[0], tex.filterMode, tex.normalized, + tex.textureObject); +} + +//C API +hipError_t hipBindTextureToMipmappedArray(const textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc); + +template +hipError_t hipBindTextureToMipmappedArray(const texture& tex, + hipMipmappedArray_const_t mipmappedArray) +{ + return hipSuccess; +} + +template +hipError_t hipBindTextureToMipmappedArray(const texture& tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc& desc) +{ + return hipSuccess; } /* @@ -2211,15 +2335,30 @@ hipError_t hipBindTextureToArray(struct texture &tex, hipArray * * @return #hipSuccess **/ -template -hipError_t hipUnbindTexture(struct texture &tex) -{ - tex._dataPtr = NULL; +hipError_t hipUnbindTexture(const textureReference* tex); - return hipSuccess; +extern hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject); + +template +hipError_t hipUnbindTexture(struct texture &tex) +{ + return ihipUnbindTextureImpl(tex.textureObject); } +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array); +hipError_t hipGetTextureAlignmentOffset (size_t* offset, const textureReference* texref); +hipError_t hipGetTextureReference(const textureReference** texref, const void* symbol); +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc); + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject); + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject); +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject); // doxygen end Texture /** diff --git a/include/hip/hcc_detail/hip_texture.h b/include/hip/hcc_detail/hip_texture.h deleted file mode 100644 index c6f5a1cfb2..0000000000 --- a/include/hip/hcc_detail/hip_texture.h +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -//#pragma once - -#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_H -#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_H - -/** - * @file hcc_detail/hip_texture.h - * @brief HIP C++ Texture API for hcc compiler - */ - -#include -#include -#include -#include -//#include - -//---- -//Texture - TODO - likely need to move this to a separate file only included with kernel compilation. -#define hipTextureType1D 1 - -#if __cplusplus -template -struct texture : public textureReference { - - const T * _dataPtr; // pointer to underlying data. - - //texture() : filterMode(hipFilterModePoint), normalized(false), _dataPtr(NULL) {}; - unsigned int width; - unsigned int height; - -}; -#endif - - -#define tex1Dfetch(_tex, _addr) (_tex._dataPtr[_addr]) - -#define tex2D(_tex, _dx, _dy) \ - _tex._dataPtr[(unsigned int)_dx + (unsigned int)_dy*(_tex.width)] - -/** - * @addtogroup API HIP API - * @{ - * - * Defines the HIP API. See the individual sections for more information. - */ - -// These are C++ APIs - maybe belong in separate file. -/** - *------------------------------------------------------------------------------------------------- - *------------------------------------------------------------------------------------------------- - * @defgroup Texture Texture Reference Management - * @{ - * - * - * @warning The HIP texture API implements a small subset of full texture API. Known limitations include: - * - Only point sampling is supported. - * - Only C++ APIs are provided. - * - Many APIs and modes are not implemented. - * - * The HIP texture support is intended to allow use of texture cache on hardware where this is beneficial. - * - * The following CUDA APIs are not currently supported: - * - cudaBindTexture2D - * - cudaBindTextureToArray - * - cudaBindTextureToMipmappedArray - * - cudaGetChannelDesc - * - cudaGetTextureReference - * - */ - -// C API: -#if 0 -hipChannelFormatDesc hipBindTexture(size_t *offset, struct textureReference *tex, const void *devPtr, const struct hipChannelFormatDesc *desc, size_t size=UINT_MAX) -{ - tex->_dataPtr = devPtr; -} -#endif - - -// End doxygen API: -/** - * @} - */ - -#endif diff --git a/include/hip/hcc_detail/hip_texture_types.h b/include/hip/hcc_detail/hip_texture_types.h new file mode 100644 index 0000000000..27f8747653 --- /dev/null +++ b/include/hip/hcc_detail/hip_texture_types.h @@ -0,0 +1,83 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** + * @file hcc_detail/hip_texture_types.h + * @brief Defines the different newt vector types for HIP runtime. + */ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H + +/******************************************************************************* +* * +* * +* * +*******************************************************************************/ +#include +//#include +#include +#include + +#if __cplusplus + +/******************************************************************************* +* * +* * +* * +*******************************************************************************/ + +template +struct texture : public textureReference +{ + texture(int norm = 0, + enum hipTextureFilterMode fMode = hipFilterModePoint, + enum hipTextureAddressMode aMode = hipAddressModeClamp) + { + normalized = norm; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = hipCreateChannelDesc(); + sRGB = 0; + } + + texture(int norm, + enum hipTextureFilterMode fMode, + enum hipTextureAddressMode aMode, + struct hipChannelFormatDesc desc) + { + normalized = norm; + filterMode = fMode; + addressMode[0] = aMode; + addressMode[1] = aMode; + addressMode[2] = aMode; + channelDesc = desc; + sRGB = 0; + } +}; + +#endif /* __cplusplus */ + +#endif /* !HIP_INCLUDE_HIP_HCC_DETAIL_HIP_TEXTURE_TYPES_H */ + diff --git a/include/hip/hcc_detail/texture_functions.h b/include/hip/hcc_detail/texture_functions.h new file mode 100644 index 0000000000..d08b429fca --- /dev/null +++ b/include/hip/hcc_detail/texture_functions.h @@ -0,0 +1,6771 @@ +/* +Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#ifndef HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H +#define HIP_INCLUDE_HIP_HCC_DETAIL_TEXTURE_FUNCTIONS_H + +#include +#include +#include + +union TData { + hc::short_vector::float4::vector_value_type f; + hc::short_vector::int4::vector_value_type i; + hc::short_vector::uint4::vector_value_type u; +}; + +#define __TEXTURE_FUNCTIONS_DECL__ static __inline__ __device__ +#define ADDRESS_SPACE_2 __attribute__((address_space(2))) +#define TEXTURE_PARAMETERS_INIT \ + unsigned int ADDRESS_SPACE_2 *i = (unsigned int ADDRESS_SPACE_2*)textureObject; \ + unsigned int ADDRESS_SPACE_2 *s = i + HIP_SAMPLER_OBJECT_OFFSET_DWORD; \ + TData texel; + +#define TEXTURE_SET_FLOAT \ + *retVal = texel.f.x; + +#define TEXTURE_SET_SIGNED \ + *retVal = texel.i.x; + +#define TEXTURE_SET_UNSIGNED \ + *retVal = texel.u.x; + +#define TEXTURE_SET_FLOAT_X \ + retVal->x = texel.f.x; + +#define TEXTURE_SET_SIGNED_X \ + retVal->x = texel.i.x; + +#define TEXTURE_SET_UNSIGNED_X \ + retVal->x = texel.u.x; + +#define TEXTURE_SET_FLOAT_XY \ + retVal->x = texel.f.x; \ + retVal->y = texel.f.y; + +#define TEXTURE_SET_SIGNED_XY \ + retVal->x = texel.i.x; \ + retVal->y = texel.i.y; + +#define TEXTURE_SET_UNSIGNED_XY \ + retVal->x = texel.u.x; \ + retVal->y = texel.u.y; + +#define TEXTURE_SET_FLOAT_XYZW \ + retVal->x = texel.f.x; \ + retVal->y = texel.f.y; \ + retVal->z = texel.f.z; \ + retVal->w = texel.f.w; + +#define TEXTURE_SET_SIGNED_XYZW \ + retVal->x = texel.i.x; \ + retVal->y = texel.i.y; \ + retVal->z = texel.i.z; \ + retVal->w = texel.i.w; + +#define TEXTURE_SET_UNSIGNED_XYZW \ + retVal->x = texel.u.x; \ + retVal->y = texel.u.y; \ + retVal->z = texel.u.z; \ + retVal->w = texel.u.w; + +#define TEXTURE_RETURN_CHAR \ + return texel.i.x; + +#define TEXTURE_RETURN_UCHAR \ + return texel.u.x; + +#define TEXTURE_RETURN_SHORT \ + return texel.i.x; + +#define TEXTURE_RETURN_USHORT \ + return texel.u.x; + +#define TEXTURE_RETURN_INT \ + return texel.i.x; + +#define TEXTURE_RETURN_UINT \ + return texel.u.x; + +#define TEXTURE_RETURN_FLOAT \ + return texel.f.x; + +#define TEXTURE_RETURN_SIGNED \ + return texel.i.x; + +#define TEXTURE_RETURN_UNSIGNED \ + return texel.u.x; + +#define TEXTURE_RETURN_CHAR_X \ + return char1(texel.i.x); + +#define TEXTURE_RETURN_UCHAR_X \ + return uchar1(texel.u.x); + +#define TEXTURE_RETURN_SHORT_X \ + return short1(texel.i.x); + +#define TEXTURE_RETURN_USHORT_X \ + return ushort1(texel.u.x); + +#define TEXTURE_RETURN_INT_X \ + return int1(texel.i.x); + +#define TEXTURE_RETURN_UINT_X \ + return uint1(texel.u.x); + +#define TEXTURE_RETURN_FLOAT_X \ + return float1(texel.f.x); + +#define TEXTURE_RETURN_CHAR_XY \ + return char2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_UCHAR_XY \ + return uchar2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_SHORT_XY \ + return short2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_USHORT_XY \ + return ushort2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_INT_XY \ + return int2(texel.i.x, texel.i.y); + +#define TEXTURE_RETURN_UINT_XY \ + return uint2(texel.u.x, texel.u.y); + +#define TEXTURE_RETURN_FLOAT_XY \ + return float2(texel.f.x, texel.f.y); + +#define TEXTURE_RETURN_CHAR_XYZW \ + return char4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_UCHAR_XYZW \ + return uchar4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_SHORT_XYZW \ + return short4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_USHORT_XYZW \ + return ushort4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_INT_XYZW \ + return int4(texel.i.x, texel.i.y, texel.i.z, texel.i.w); + +#define TEXTURE_RETURN_UINT_XYZW \ + return uint4(texel.u.x, texel.u.y, texel.u.z, texel.u.w); + +#define TEXTURE_RETURN_FLOAT_XYZW \ + return float4(texel.f.x, texel.f.y, texel.f.z, texel.f.w); + +extern "C" { +hc::short_vector::float4::vector_value_type +__ockl_image_sample_1D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + float c) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_1Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_2D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c) [[hc]]; + + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_2Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c) [[hc]]; + +float +__ockl_image_sample_2Dad(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c) [[hc]]; + +float +__ockl_image_sample_2Dd(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_3D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_grad_1D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + float c, + float dx, + float dy) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_grad_1Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + float dx, + float dy) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_grad_2D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + hc::short_vector::float2::vector_value_type dx, + hc::short_vector::float2::vector_value_type dy) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_grad_2Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + hc::short_vector::float2::vector_value_type dx, + hc::short_vector::float2::vector_value_type dy) [[hc]]; + +float +__ockl_image_sample_grad_2Dad(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + hc::short_vector::float2::vector_value_type dx, + hc::short_vector::float2::vector_value_type dy) [[hc]]; + +float +__ockl_image_sample_grad_2Dd(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + hc::short_vector::float2::vector_value_type dx, + hc::short_vector::float2::vector_value_type dy) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_grad_3D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + hc::short_vector::float4::vector_value_type dx, + hc::short_vector::float4::vector_value_type dy) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_lod_1D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + float c, + float l) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_lod_1Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + float l) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_lod_2D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + float l) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_lod_2Da(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + float l) [[hc]]; + +float +__ockl_image_sample_lod_2Dad(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + float l) [[hc]]; + +float +__ockl_image_sample_lod_2Dd(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float2::vector_value_type c, + float l) [[hc]]; + +hc::short_vector::float4::vector_value_type +__ockl_image_sample_lod_3D(unsigned int ADDRESS_SPACE_2 * i, + unsigned int ADDRESS_SPACE_2 * s, + hc::short_vector::float4::vector_value_type c, + float l) [[hc]]; +} + +//////////////////////////////////////////////////////////// +// Texture object APIs +//////////////////////////////////////////////////////////// + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(char4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned char *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uchar4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(short4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned short *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(ushort4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(int4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(unsigned int *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(uint4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1Dfetch(float4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1Dfetch(hipTextureObject_t textureObject, int x) +{ + T ret; + tex1Dfetch(&ret, textureObject, x); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(char4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned char *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uchar4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(short4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned short *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(ushort4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(int4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(unsigned int *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(uint4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float1 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float2 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1D(float4 *retVal, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_SET_FLOAT_XYZW; +} +template +__TEXTURE_FUNCTIONS_DECL__ T tex1D(hipTextureObject_t textureObject, int x) +{ + T ret; + tex1D(&ret, textureObject, x); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(char4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned char *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uchar4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(short4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned short *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(ushort4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(int4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(unsigned int *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(uint4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float1 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float2 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLod(float4 *retVal, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLod(hipTextureObject_t textureObject, float x, float level) +{ + T ret; + tex1DLod(&ret, textureObject, x, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(char4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned char *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uchar4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(short4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned short *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(ushort4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(int4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(unsigned int *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(uint4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float1 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float2 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DGrad(float4 *retVal, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DGrad(hipTextureObject_t textureObject, float x, float dx, float dy) +{ + T ret; + tex1DLod(&ret, textureObject, x, dx, dy); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(char4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(short4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(int4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(uint4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float1 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float2 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2D(float4 *retVal, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2D(hipTextureObject_t textureObject, float x, float y) +{ + T ret; + tex2D(&ret, textureObject, x, y); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(char4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(short4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(int4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(uint4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float1 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float2 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLod(float4 *retVal, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLod(hipTextureObject_t textureObject, float x, float y, float level) +{ + T ret; + tex2DLod(&ret, textureObject, x, y, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(char4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(short4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(int4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(uint4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float1 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float2 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3D(float4 *retVal, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex3D(hipTextureObject_t textureObject, float x, float y, float z) +{ + T ret; + tex3D(&ret, textureObject, x, y, z); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(char4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(short4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(int4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(uint4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float1 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float2 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex3DLod(float4 *retVal, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex3DLod(hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + T ret; + tex3DLod(&ret, textureObject, x, y, z, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(char4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned char *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uchar4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(short4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned short *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(ushort4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(int4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(unsigned int *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(uint4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float1 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float2 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_FLOAT_XY; +} +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayered(float4 *retVal, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayered(hipTextureObject_t textureObject, float x, int layer) +{ + T ret; + tex1DLayered(&ret, textureObject, x, layer); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(char4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned char *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uchar4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(short4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned short *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(ushort4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(int4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(unsigned int *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(uint4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float1 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float2 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredLod(float4 *retVal, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredLod(hipTextureObject_t textureObject, float x, int layer, float level) +{ + T ret; + tex1DLayeredLod(&ret, textureObject, x, layer, level); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(char4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned char *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uchar4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(short4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned short *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(ushort4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(int4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(unsigned int *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(uint4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float1 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float2 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex1DLayeredGrad(float4 *retVal, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex1DLayeredGrad(hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + T ret; + tex1DLayeredGrad(&ret, textureObject, x, layer, dx, dy); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(char4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(short4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(int4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(uint4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayered(float4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLayered(hipTextureObject_t textureObject, float x, float y, int layer) +{ + T ret; + tex2DLayered(&ret, textureObject, x, y, layer); + return ret; +} + +//////////////////////////////////////////////////////////// +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(char4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned char *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uchar4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(short4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned short *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(ushort4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(int4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_SIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(unsigned int *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(uint4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_UNSIGNED_XYZW; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float1 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_X; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float2 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_XY; +} + +__TEXTURE_FUNCTIONS_DECL__ void tex2DLayeredLod(float4 *retVal, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_SET_FLOAT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ T tex2DLayeredLod(hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + T ret; + tex2DLayeredLod(&ret, textureObject, x, y, layer, level); + return ret; +} + +//////////////////////////////////////////////////////////// +// Texture Reference APIs +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1Dfetch(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1D(texture texRef, hipTextureObject_t textureObject, int x) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1D(i, s, x); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_X; +} +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLod(texture texRef, hipTextureObject_t textureObject, float x, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1D(i, s, x, level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DGrad(texture texRef, hipTextureObject_t textureObject, float x, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1D(i, s, x, dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2D(texture texRef, hipTextureObject_t textureObject, float x, float y) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2D(i, s, hc::short_vector::float2(x, y).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2D(i, s, hc::short_vector::float2(x, y).get_vector(), level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2D(i, s, + hc::short_vector::float2(x, y).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3D(texture texRef, hipTextureObject_t textureObject, float x, float y, float z) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DLod(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_3D(i, s, hc::short_vector::float4(x, y, z, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// +template +__TEXTURE_FUNCTIONS_DECL__ char tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex3DGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, float z, float4 dx, float4 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f =__ockl_image_sample_grad_3D(i, s, + hc::short_vector::float4(x, y, z, 0.0f).get_vector(), + hc::short_vector::float4(dx.x, dx.y, dx.z, dx.w).get_vector(), + hc::short_vector::float4(dy.x, dy.y, dy.z, dy.w).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayered(texture texRef, hipTextureObject_t textureObject, float x, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_1Da(i, s, hc::short_vector::float2(x, layer).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex1DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, int layer, float dx, float dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_1Da(i, s, hc::short_vector::float2(x, layer).get_vector(), dx, dy); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayered(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredLod(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float level) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_lod_2Da(i, s, hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), level); + TEXTURE_RETURN_FLOAT_XYZW; +} + +//////////////////////////////////////////////////////////// + +template +__TEXTURE_FUNCTIONS_DECL__ char tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ char4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_CHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned char tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uchar4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UCHAR_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ short4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_SHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned short tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ ushort4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_USHORT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ int4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_INT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ unsigned int tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ uint4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_UINT_XYZW; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float1 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_X; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float2 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_XY; +} + +template +__TEXTURE_FUNCTIONS_DECL__ float4 tex2DLayeredGrad(texture texRef, hipTextureObject_t textureObject, float x, float y, int layer, float2 dx, float2 dy) +{ + TEXTURE_PARAMETERS_INIT; + texel.f = __ockl_image_sample_grad_2Da(i, s, + hc::short_vector::float4(x, y, layer, 0.0f).get_vector(), + hc::short_vector::float2(dx.x, dx.y).get_vector(), + hc::short_vector::float2(dy.x, dy.y).get_vector()); + TEXTURE_RETURN_FLOAT_XYZW; +} +#endif diff --git a/include/hip/hcc_detail/texture_types.h b/include/hip/hcc_detail/texture_types.h index 74680bbc76..731ed12308 100644 --- a/include/hip/hcc_detail/texture_types.h +++ b/include/hip/hcc_detail/texture_types.h @@ -26,20 +26,91 @@ THE SOFTWARE. #include -enum hipTextureReadMode +#define hipTextureType1D 0x01 +#define hipTextureType2D 0x02 +#define hipTextureType3D 0x03 +#define hipTextureTypeCubemap 0x0C +#define hipTextureType1DLayered 0xF1 +#define hipTextureType2DLayered 0xF2 +#define hipTextureTypeCubemapLayered 0xFC + +/** + * Should be same as HSA_IMAGE_OBJECT_SIZE_DWORD/HSA_SAMPLER_OBJECT_SIZE_DWORD + */ +#define HIP_IMAGE_OBJECT_SIZE_DWORD 12 +#define HIP_SAMPLER_OBJECT_SIZE_DWORD 8 +#define HIP_SAMPLER_OBJECT_OFFSET_DWORD HIP_IMAGE_OBJECT_SIZE_DWORD +#define HIP_TEXTURE_OBJECT_SIZE_DWORD (HIP_IMAGE_OBJECT_SIZE_DWORD + HIP_SAMPLER_OBJECT_SIZE_DWORD) + +/** + * An opaque value that represents a hip texture object + */ +typedef unsigned long long hipTextureObject_t; + +/** + * hip texture address modes + */ +enum hipTextureAddressMode { - hipReadModeElementType = 0 + hipAddressModeWrap = 0, + hipAddressModeClamp = 1, + hipAddressModeMirror = 2, + hipAddressModeBorder = 3 }; +/** + * hip texture filter modes + */ enum hipTextureFilterMode { - hipFilterModePoint = 0 + hipFilterModePoint = 0, + hipFilterModeLinear = 1 }; -struct textureReference { - enum hipTextureFilterMode filterMode; - unsigned normalized; - struct hipChannelFormatDesc channelDesc; +/** + * hip texture read modes + */ +enum hipTextureReadMode +{ + hipReadModeElementType = 0, + hipReadModeNormalizedFloat = 1 +}; + +/** + * hip texture reference + */ +struct textureReference +{ + int normalized; + enum hipTextureFilterMode filterMode; + enum hipTextureAddressMode addressMode[3]; //Texture address mode for up to 3 dimensions + struct hipChannelFormatDesc channelDesc; + int sRGB; // Perform sRGB->linear conversion during texture read + unsigned int maxAnisotropy; // Limit to the anisotropy ratio + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; + + hipTextureObject_t textureObject; +}; + +/** + * hip texture descriptor + */ +struct hipTextureDesc +{ + enum hipTextureAddressMode addressMode[3]; //Texture address mode for up to 3 dimensions + enum hipTextureFilterMode filterMode; + enum hipTextureReadMode readMode; + int sRGB; // Perform sRGB->linear conversion during texture read + float borderColor[4]; + int normalizedCoords; + unsigned int maxAnisotropy; + enum hipTextureFilterMode mipmapFilterMode; + float mipmapLevelBias; + float minMipmapLevelClamp; + float maxMipmapLevelClamp; }; #endif diff --git a/include/hip/hip_hcc.h b/include/hip/hip_hcc.h index 3407a311bd..582d544eb9 100644 --- a/include/hip/hip_hcc.h +++ b/include/hip/hip_hcc.h @@ -23,8 +23,83 @@ THE SOFTWARE. #ifndef HIP_INCLUDE_HIP_HIP_HCC_H #define HIP_INCLUDE_HIP_HIP_HCC_H -#if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) -#include "hip/hcc_detail/hip_hcc.h" -#endif +#ifdef __HCC__ -#endif +#include "hip/hip_runtime_api.h" + +// Forward declarations: +namespace hc { + class accelerator; + class accelerator_view; +}; + + +/** + *------------------------------------------------------------------------------------------------- + *------------------------------------------------------------------------------------------------- + * @defgroup HCC-specific features + * @warning These APIs provide access to special features of HCC compiler and are not available through the CUDA path. + * @{ + */ + + +/** + * @brief Return hc::accelerator associated with the specified deviceId + * @return #hipSuccess, #hipErrorInvalidDevice + */ +hipError_t hipHccGetAccelerator(int deviceId, hc::accelerator *acc); + +/** + * @brief Return hc::accelerator_view associated with the specified stream + * + * If stream is 0, the accelerator_view for the default stream is returned. + * @return #hipSuccess + */ +hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **av); + + + +/** + * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra + * + * @param [in[ f Kernel to launch. + * @param [in] gridDimX X grid dimension specified in work-items + * @param [in] gridDimY Y grid dimension specified in work-items + * @param [in] gridDimZ Z grid dimension specified in work-items + * @param [in] blockDimX X block dimensions specified in work-items + * @param [in] blockDimY Y grid dimension specified in work-items + * @param [in] blockDimZ Z grid dimension specified in work-items + * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules. + * @param [in] kernelParams + * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel. + * @param [in] startEvent If non-null, specified event will be updated to track the start time of the kernel launch. The event must be created before calling this API. + * @param [in] stopEvent If non-null, specified event will be updated to track the stop time of the kernel launch. The event must be created before calling this API. + * + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * + * @warning kernellParams argument is not yet implemented in HIP. Please use extra instead. Please refer to hip_porting_driver_api.md for sample usage. + + * HIP/ROCm actually updates the start event when the associated kernel completes. + */ +hipError_t hipHccModuleLaunchKernel(hipFunction_t f, + uint32_t globalWorkSizeX, + uint32_t globalWorkSizeY, + uint32_t globalWorkSizeZ, + uint32_t localWorkSizeX, + uint32_t localWorkSizeY, + uint32_t localWorkSizeZ, + size_t sharedMemBytes, + hipStream_t hStream, + void **kernelParams, + void **extra, + hipEvent_t startEvent=nullptr, + hipEvent_t stopEvent=nullptr + ); + +// doxygen end HCC-specific features +/** + * @} + */ +#endif // #ifdef __HCC__ +#endif // #ifdef HIP_INCLUDE_HIP_HIP_HCC_H diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h index dc163d5c25..97733964bf 100644 --- a/include/hip/hip_runtime_api.h +++ b/include/hip/hip_runtime_api.h @@ -102,6 +102,7 @@ typedef struct hipDeviceProp_t { int clockInstructionRate; ///< Frequency in khz of the timer used by the device-side "clock*" instructions. New for HIP. hipDeviceArch_t arch; ///< Architectural feature flags. New for HIP. int concurrentKernels; ///< Device can possibly execute multiple kernels concurrently. + int pciDomainID; ///< PCI Domain ID int pciBusID; ///< PCI Bus ID. int pciDeviceID; ///< PCI Device ID. size_t maxSharedMemoryPerMultiProcessor; ///< Maximum Shared Memory Per Multiprocessor. @@ -160,6 +161,7 @@ typedef enum hipError_t { hipErrorProfilerNotInitialized = 6, hipErrorProfilerAlreadyStarted = 7, hipErrorProfilerAlreadyStopped = 8, + hipErrorInsufficientDriver = 35, hipErrorInvalidImage = 200, hipErrorInvalidContext = 201, ///< Produced when input context is invalid. hipErrorContextAlreadyCurrent = 202, diff --git a/include/hip/hip_texture.h b/include/hip/hip_texture_types.h similarity index 83% rename from include/hip/hip_texture.h rename to include/hip/hip_texture_types.h index a15c5a1016..44dc7a9e0f 100644 --- a/include/hip/hip_texture.h +++ b/include/hip/hip_texture_types.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -20,13 +20,15 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_H -#define HIP_INCLUDE_HIP_HIP_TEXTURE_H + + +#ifndef HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_HIP_TEXTURE_TYPES_H #if defined(__HIP_PLATFORM_HCC__) && !defined (__HIP_PLATFORM_NVCC__) -#include +#include #elif defined(__HIP_PLATFORM_NVCC__) && !defined (__HIP_PLATFORM_HCC__) -#include +#include #else #error("Must define exactly one of __HIP_PLATFORM_HCC__ or __HIP_PLATFORM_NVCC__"); #endif diff --git a/include/hip/nvcc_detail/hip_runtime.h b/include/hip/nvcc_detail/hip_runtime.h index 8c08f3d151..fca9ab6e39 100644 --- a/include/hip/nvcc_detail/hip_runtime.h +++ b/include/hip/nvcc_detail/hip_runtime.h @@ -111,6 +111,8 @@ kernelName<<>>(__VA_ARGS__);\ #ifdef __HIP_DEVICE_COMPILE__ #define abort() {asm("trap;");} +#undef assert +#define assert(COND) { if (!COND) {abort();} } #endif #endif diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h index b1011aac6c..1103bb2274 100644 --- a/include/hip/nvcc_detail/hip_runtime_api.h +++ b/include/hip/nvcc_detail/hip_runtime_api.h @@ -31,6 +31,13 @@ THE SOFTWARE. extern "C" { #endif +#ifdef __cplusplus + #define __dparm(x) \ + = x +#else + #define __dparm(x) +#endif + //TODO -move to include/hip_runtime_api.h as a common implementation. /** * Memory copy types @@ -102,20 +109,20 @@ typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; typedef cudaIpcEventHandle_t hipIpcEventHandle_t; typedef cudaIpcMemHandle_t hipIpcMemHandle_t; -typedef cudaLimit hipLimit_t; -typedef cudaFuncCache hipFuncCache_t; +typedef enum cudaLimit hipLimit_t; +typedef enum cudaFuncCache hipFuncCache_t; typedef CUcontext hipCtx_t; -typedef CUsharedconfig hipSharedMemConfig; +typedef cudaSharedMemConfig hipSharedMemConfig; typedef CUfunc_cache hipFuncCache; typedef CUjit_option hipJitOption; typedef CUdevice hipDevice_t; typedef CUmodule hipModule_t; typedef CUfunction hipFunction_t; typedef CUdeviceptr hipDeviceptr_t; -typedef cudaChannelFormatKind hipChannelFormatKind; -typedef cudaChannelFormatDesc hipChannelFormatDesc; -typedef cudaTextureReadMode hipTextureReadMode; -typedef cudaArray hipArray; +typedef enum cudaChannelFormatKind hipChannelFormatKind; +typedef struct cudaChannelFormatDesc hipChannelFormatDesc; +typedef enum cudaTextureReadMode hipTextureReadMode; +typedef struct cudaArray hipArray; // Flags that can be used with hipStreamCreateWithFlags #define hipStreamDefault cudaStreamDefault @@ -124,6 +131,11 @@ typedef cudaArray hipArray; //typedef cudaChannelFormatDesc hipChannelFormatDesc; #define hipChannelFormatDesc cudaChannelFormatDesc +//adding code for hipmemSharedConfig +#define hipSharedMemBankSizeDefault cudaSharedMemBankSizeDefault +#define hipSharedMemBankSizeFourByte cudaSharedMemBankSizeFourByte +#define hipSharedMemBankSizeEightByte cudaSharedMemBankSizeEightByte + inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { switch(cuError) { case cudaSuccess : return hipSuccess; @@ -187,7 +199,7 @@ switch(hError) { } } -inline static cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { +inline static enum cudaMemcpyKind hipMemcpyKindToCudaMemcpyKind(hipMemcpyKind kind) { switch(kind) { case hipMemcpyHostToHost: return cudaMemcpyHostToHost; @@ -250,7 +262,7 @@ inline static hipError_t hipHostMalloc(void** ptr, size_t size, unsigned int fla return hipCUDAErrorTohipError(cudaHostAlloc(ptr, size, flags)); } -inline static hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) { +inline static hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) { return hipCUDAErrorTohipError(cudaMallocArray(array, desc, width, height, flags)); } @@ -289,8 +301,8 @@ inline static hipError_t hipSetDevice(int device) { inline static hipError_t hipChooseDevice( int* device, const hipDeviceProp_t* prop ) { - cudaDeviceProp cdprop; - memset(&cdprop,0x0,sizeof(cudaDeviceProp)); + struct cudaDeviceProp cdprop; + memset(&cdprop,0x0,sizeof(struct cudaDeviceProp)); cdprop.major= prop->major; cdprop.minor = prop->minor; cdprop.totalGlobalMem = prop->totalGlobalMem ; @@ -351,25 +363,24 @@ inline static hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, } -inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream=0) { +inline static hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind copyKind, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemcpyAsync(dst, src, sizeBytes, hipMemcpyKindToCudaMemcpyKind(copyKind), stream)); } - -inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset = 0, hipMemcpyKind copyType = hipMemcpyHostToDevice) { +inline static hipError_t hipMemcpyToSymbol(const void* symbol, const void* src, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind copyType __dparm(hipMemcpyHostToDevice)) { return hipCUDAErrorTohipError(cudaMemcpyToSymbol(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType))); } -inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream = 0) { +inline static hipError_t hipMemcpyToSymbolAsync(const void* symbol, const void* src, size_t sizeBytes, size_t offset, hipMemcpyKind copyType, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemcpyToSymbolAsync(symbol, src, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(copyType), stream)); } -inline static hipError_t hipMemcpyFromSymbol(void *dst, const void* symbolName, size_t sizeBytes, size_t offset = 0, hipMemcpyKind kind = hipMemcpyDeviceToHost) +inline static hipError_t hipMemcpyFromSymbol(void *dst, const void* symbolName, size_t sizeBytes, size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyDeviceToHost)) { return hipCUDAErrorTohipError(cudaMemcpyFromSymbol(dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind))); } -inline static hipError_t hipMemcpyFromSymbolAsync(void *dst, const void* symbolName, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream = 0) +inline static hipError_t hipMemcpyFromSymbolAsync(void *dst, const void* symbolName, size_t sizeBytes, size_t offset, hipMemcpyKind kind, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemcpyFromSymbolAsync(dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream)); } @@ -438,7 +449,7 @@ inline static hipError_t hipMemset(void* devPtr,int value, size_t count) { return hipCUDAErrorTohipError(cudaMemset(devPtr, value, count)); } -inline static hipError_t hipMemsetAsync(void* devPtr,int value, size_t count, hipStream_t stream = 0) { +inline static hipError_t hipMemsetAsync(void* devPtr,int value, size_t count, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemsetAsync(devPtr, value, count, stream)); } @@ -449,7 +460,7 @@ inline static hipError_t hipMemsetD8(hipDeviceptr_t dest, unsigned char value, inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int device) { - cudaDeviceProp cdprop; + struct cudaDeviceProp cdprop; cudaError_t cerror; cerror = cudaGetDeviceProperties(&cdprop,device); strncpy(p_prop->name,cdprop.name, 256); @@ -510,7 +521,7 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { - cudaDeviceAttr cdattr; + enum cudaDeviceAttr cdattr; cudaError_t cerror; switch (attr) { @@ -586,7 +597,7 @@ inline static hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( } inline static hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, void* ptr){ - cudaPointerAttributes cPA; + struct cudaPointerAttributes cPA; hipError_t err = hipCUDAErrorTohipError(cudaPointerGetAttributes(&cPA, ptr)); if(err == hipSuccess){ switch (cPA.memoryType){ @@ -617,7 +628,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event) return hipCUDAErrorTohipError(cudaEventCreate(event)); } -inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL) +inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream __dparm(NULL)) { return hipCUDAErrorTohipError(cudaEventRecord(event,stream)); } @@ -750,18 +761,18 @@ inline static hipError_t hipMemcpyPeer ( void* dst, int dstDevice, const void* return hipCUDAErrorTohipError(cudaMemcpyPeer(dst, dstDevice, src, srcDevice, count)); } -inline static hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t count, hipStream_t stream=0 ) +inline static hipError_t hipMemcpyPeerAsync ( void* dst, int dstDevice, const void* src, int srcDevice, size_t count, hipStream_t stream __dparm(0)) { return hipCUDAErrorTohipError(cudaMemcpyPeerAsync(dst, dstDevice, src, srcDevice, count, stream)); } // Profile APIs: -inline hipError_t hipProfilerStart() +inline static hipError_t hipProfilerStart() { return hipCUDAErrorTohipError(cudaProfilerStart()); } -inline hipError_t hipProfilerStop() +inline static hipError_t hipProfilerStop() { return hipCUDAErrorTohipError(cudaProfilerStop()); } @@ -833,12 +844,12 @@ inline static hipError_t hipCtxSetCacheConfig (hipFuncCache cacheConfig) inline static hipError_t hipCtxSetSharedMemConfig (hipSharedMemConfig config) { - return hipCUResultTohipError(cuCtxSetSharedMemConfig(config)); + return hipCUResultTohipError(cuCtxSetSharedMemConfig((CUsharedconfig)config)); } inline static hipError_t hipCtxGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { - return hipCUResultTohipError(cuCtxGetSharedMemConfig(pConfig)); + return hipCUResultTohipError(cuCtxGetSharedMemConfig((CUsharedconfig *)pConfig)); } inline static hipError_t hipCtxSynchronize ( void ) @@ -873,12 +884,22 @@ inline static hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device) inline static hipError_t hipDeviceGetPCIBusId(char* pciBusId,int len,hipDevice_t device) { - return hipCUResultTohipError(cuDeviceGetPCIBusId(pciBusId,len,device)); + return hipCUDAErrorTohipError(cudaDeviceGetPCIBusId(pciBusId,len,device)); } -inline static hipError_t hipDeviceGetByPCIBusId(int* device, const int *pciBusId) +inline static hipError_t hipDeviceGetByPCIBusId(int* device, const char *pciBusId) { - return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device,(char*)pciBusId)); + return hipCUDAErrorTohipError(cudaDeviceGetByPCIBusId(device, pciBusId)); +} + +inline static hipError_t hipDeviceGetSharedMemConfig(hipSharedMemConfig *config) +{ + return hipCUDAErrorTohipError(cudaDeviceGetSharedMemConfig(config)); +} + +inline static hipError_t hipDeviceSetSharedMemConfig(hipSharedMemConfig config) +{ + return hipCUDAErrorTohipError(cudaDeviceSetSharedMemConfig(config)); } inline static hipError_t hipDeviceGetLimit(size_t *pValue, hipLimit_t limit) diff --git a/include/hip/nvcc_detail/hip_texture_types.h b/include/hip/nvcc_detail/hip_texture_types.h new file mode 100644 index 0000000000..751dd8e4d4 --- /dev/null +++ b/include/hip/nvcc_detail/hip_texture_types.h @@ -0,0 +1,6 @@ +#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H +#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_TYPES_H + +#include + +#endif diff --git a/install.sh b/install.sh new file mode 100755 index 0000000000..f8ad640798 --- /dev/null +++ b/install.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +BUILD_ROOT="$( mktemp -d )" +SRC_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +WORKING_DIR=$PWD +DASH_JAY="-j $(getconf _NPROCESSORS_ONLN)" + +err() { + echo "${1-Died}." >&2 +} + +die() { + err "$1" + exit 1 +} + +pushd () { + command pushd "$@" > /dev/null +} + +popd () { + command popd "$@" > /dev/null +} + +function setupENV() +{ + sudo apt-get update + sudo apt-get install dpkg-dev rpm doxygen libelf-dev +} + +function buildHIP() +{ + pushd $BUILD_ROOT + cmake $SRC_ROOT -DCMAKE_BUILD_TYPE=Release -DCOMPILE_HIP_ATP_MARKER=1 + make $DASH_JAY + make package + rename -v 's/([a-z0-9_.\-]).deb/$1-amd64.deb/' *.deb;rename -v 's/([a-z0-9_.\-]).rpm/$1.x86_64.rpm/' *.rpm + cp hip_*.deb $WORKING_DIR + sudo dpkg -i hip_base*.deb hip_hcc*.deb hip_sample*.deb hip_doc*.deb + popd + rm -rf $BUILD_ROOT +} + +echo "Preparing build environment" +setupENV || die "setupENV failed" +echo "Building and installing HIP packages" +buildHIP || die "buildHIP failed" +echo "Finished building HIP packages" diff --git a/packaging/convert_md_to_html.sh b/packaging/convert_md_to_html.sh index b2c868cded..e6442a9ef2 100755 --- a/packaging/convert_md_to_html.sh +++ b/packaging/convert_md_to_html.sh @@ -54,8 +54,8 @@ popd # replace github.io links pushd $html_destdir -sed -i "s?http://gpuopen-professionalcompute-tools.github.io/HIP?docs/RuntimeAPI/html/index.html?g" README.html -sed -i "s?http://gpuopen-professionalcompute-tools.github.io/HIP?docs/RuntimeAPI/html/?g" RELEASE.html +sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/index.html?g" README.html +sed -i "s?http://rocm-developer-tools.github.io/HIP?docs/RuntimeAPI/html/?g" RELEASE.html popd exit 0 diff --git a/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp b/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp index d207154e39..f6f2fab709 100644 --- a/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp +++ b/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp @@ -7,16 +7,23 @@ using namespace std; +#define SORT_BY_NAME 0 +#define SORT_RETAIN_ATTS_ORDER 1 + + bool ResultDatabase::Result::operator<(const Result &rhs) const { if (test < rhs.test) return true; if (test > rhs.test) return false; +#if (SORT_RETAIN_ATTS_ORDER == 0) + // For ties, sort by the value of the attribute: if (atts < rhs.atts) return true; if (atts > rhs.atts) return false; +#endif return false; // less-operator returns false on equal } @@ -189,7 +196,10 @@ void ResultDatabase::AddResult(const string &test_orig, void ResultDatabase::DumpDetailed(ostream &out) { vector sorted(results); - sort(sorted.begin(), sorted.end()); + +#if SORT_BY_NAME + stable_sort(sorted.begin(), sorted.end()); +#endif const int testNameW = 24 ; const int attW = 12; @@ -283,12 +293,15 @@ void ResultDatabase::DumpDetailed(ostream &out) void ResultDatabase::DumpSummary(ostream &out) { vector sorted(results); - sort(sorted.begin(), sorted.end()); - const int testNameW = 24 ; +#if SORT_BY_NAME + stable_sort(sorted.begin(), sorted.end()); +#endif + + const int testNameW = 32 ; const int attW = 12; const int fieldW = 9; - out << std::fixed << right << std::setprecision(4); + out << std::fixed << right << std::setprecision(2); // TODO: in big parallel runs, the "trials" are the procs // and we really don't want to print them all out.... @@ -334,8 +347,8 @@ void ResultDatabase::DumpSummary(ostream &out) } if (0) { out << endl - << "Note: results marked with (*) had missing values such as" << endl - << "might occur with a mixture of architectural capabilities." << endl; + << "Note: results marked with (*) had missing values such as" << endl + << "might occur with a mixture of architectural capabilities." << endl; } } @@ -381,7 +394,9 @@ void ResultDatabase::DumpCsv(string fileName) bool emptyFile; vector sorted(results); - sort(sorted.begin(), sorted.end()); +#if SORT_BY_NAME + stable_sort(sorted.begin(), sorted.end()); +#endif //Check to see if the file is empty - if so, add the headers emptyFile = this->IsFileEmpty(fileName); diff --git a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index b343386b5c..2a4f6ff649 100644 --- a/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -25,15 +25,27 @@ THE SOFTWARE. #include #include"ResultDatabase.h" -#define check(msg, status) \ -if(status != hipSuccess){ \ - printf("%s failed.\n",#msg); \ - exit(1); \ +#define PRINT_PROGRESS 0 + +#define check(cmd) \ +{\ + hipError_t status = cmd;\ + if(status != hipSuccess){ \ + printf("error: '%s'(%d) from %s at %s:%d\n", \ + hipGetErrorString(status), status, #cmd,\ + __FILE__, __LINE__); \ + abort(); \ + }\ } #define LEN 1024*1024 -#define SIZE LEN * sizeof(float) -#define ITER 10120 + +#define NUM_GROUPS 1 +#define GROUP_SIZE 64 +#define TEST_ITERS 20 +#define DISPATCHES_PER_TEST 100 + +const unsigned p_tests = 0xfffffff; // HCC optimizes away fully NULL kernel calls, so run one that is nearly null: @@ -44,115 +56,112 @@ __global__ void NearlyNull(hipLaunchParm lp, float* Ad){ } +ResultDatabase resultDB; + + +void stopTest(hipEvent_t start, hipEvent_t stop, const char *msg, int iters) +{ + float mS = 0; + check(hipEventRecord(stop)); + check(hipDeviceSynchronize()); + check(hipEventElapsedTime(&mS, start, stop)); + resultDB.AddResult(std::string(msg), "", "uS", mS*1000/iters); + if (PRINT_PROGRESS & 0x1 ) { + std::cout<< msg <<"\t\t"< 1){ - return NAN; - } - if (x == 0){ - return 0; - } - if (x > 0){ - sign = 1; - } else { - sign = -1; - x = -x; - } - if (x <= 0.7) { - float x1 = x * x; - float x2 = std::fma(__hip_erfinva3, x1, __hip_erfinva2); - float x3 = std::fma(x2, x1, __hip_erfinva1); - float x4 = x * std::fma(x3, x1, __hip_erfinva0); - - float r1 = std::fma(__hip_erfinvb4, x1, __hip_erfinvb3); - float r2 = std::fma(r1, x1, __hip_erfinvb2); - float r3 = std::fma(r2, x1, __hip_erfinvb1); - ret = x4 / std::fma(r3, x1, __hip_erfinvb0); - } else { - float x1 = std::sqrt(-std::log((1 - x) / 2)); - float x2 = std::fma(__hip_erfinvc3, x1, __hip_erfinvc2); - float x3 = std::fma(x2, x1, __hip_erfinvc1); - float x4 = std::fma(x3, x1, __hip_erfinvc0); - - float r1 = std::fma(__hip_erfinvd2, x1, __hip_erfinvd1); - ret = x4 / std::fma(r1, x1, __hip_erfinvd0); - } - - ret = ret * sign; - x = x * sign; - - ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret)); - ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret)); - - return ret; - -} - -double __hip_host_erfinv(double x) -{ - double ret; - int sign; - if (x < -1 || x > 1){ - return NAN; - } - if (x == 0){ - return 0; - } - if (x > 0){ - sign = 1; - } else { - sign = -1; - x = -x; - } - if (x <= 0.7) { - double x1 = x * x; - double x2 = std::fma(__hip_erfinva3, x1, __hip_erfinva2); - double x3 = std::fma(x2, x1, __hip_erfinva1); - double x4 = x * std::fma(x3, x1, __hip_erfinva0); - - double r1 = std::fma(__hip_erfinvb4, x1, __hip_erfinvb3); - double r2 = std::fma(r1, x1, __hip_erfinvb2); - double r3 = std::fma(r2, x1, __hip_erfinvb1); - ret = x4 / std::fma(r3, x1, __hip_erfinvb0); - } else { - double x1 = std::sqrt(-std::log((1 - x) / 2)); - double x2 = std::fma(__hip_erfinvc3, x1, __hip_erfinvc2); - double x3 = std::fma(x2, x1, __hip_erfinvc1); - double x4 = std::fma(x3, x1, __hip_erfinvc0); - - double r1 = std::fma(__hip_erfinvd2, x1, __hip_erfinvd1); - ret = x4 / std::fma(r1, x1, __hip_erfinvd0); - } - - ret = ret * sign; - x = x * sign; - - ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret)); - ret -= (std::erf(ret) - x) / (2 / std::sqrt(HIP_PI) * std::exp(-ret * ret)); - - return ret; - -} - -float __hip_host_erfcinvf(float y) -{ - return __hip_host_erfinvf(1 - y); -} - -double __hip_host_erfcinv(double y) -{ - return __hip_host_erfinv(1 - y); + std::atomic_thread_fence(std::memory_order_seq_cst); } double __hip_host_j0(double x) diff --git a/src/device_util.h b/src/device_util.h index ad8b2607dd..f942fdf5e5 100644 --- a/src/device_util.h +++ b/src/device_util.h @@ -128,12 +128,6 @@ __device__ double __hip_fast_dsqrt_ru(double x); __device__ double __hip_fast_dsqrt_rz(double x); __device__ void __threadfence_system(void); -float __hip_host_erfinvf(float x); -double __hip_host_erfinv(double x); - -float __hip_host_erfcinvf(float y); -double __hip_host_erfcinv(double y); - float __hip_host_j0f(float x); double __hip_host_j0(double x); diff --git a/src/grid_launch.cpp b/src/grid_launch.cpp index f3b28c5f60..fd5c2a1573 100644 --- a/src/grid_launch.cpp +++ b/src/grid_launch.cpp @@ -72,8 +72,8 @@ namespace hip_impl } if (COMPILE_HIP_DB && HIP_TRACE_API) { - std::cerr << API_COLOR << os.str() << API_COLOR_END - << std::endl; + std::string fullStr; + recordApiTrace(&fullStr, os.str()); } } } diff --git a/src/hip_context.cpp b/src/hip_context.cpp index 11ef6d6da5..69d75e7f31 100644 --- a/src/hip_context.cpp +++ b/src/hip_context.cpp @@ -31,6 +31,7 @@ THE SOFTWARE. // Stack of contexts thread_local std::stack tls_ctxStack; +thread_local bool tls_getPrimaryCtx = true; void ihipCtxStackUpdate() { @@ -65,6 +66,7 @@ hipError_t hipCtxCreate(hipCtx_t *ctx, unsigned int flags, hipDevice_t device) *ctx = ictx; ihipSetTlsDefaultCtx(*ctx); tls_ctxStack.push(*ctx); + tls_getPrimaryCtx = false; deviceCrit->addContext(ictx); } @@ -93,8 +95,7 @@ hipError_t hipDriverGetVersion(int *driverVersion) hipError_t e = hipSuccess; if (driverVersion) { *driverVersion = 4; - } - else { + } else { e = hipErrorInvalidValue; } @@ -107,8 +108,7 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion) hipError_t e = hipSuccess; if (runtimeVersion) { *runtimeVersion = HIP_VERSION_PATCH; - } - else { + } else { e = hipErrorInvalidValue; } @@ -124,9 +124,7 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) if(primaryCtx== ctx) { e = hipErrorInvalidValue; - } - else - { + } else { if(currentCtx == ctx) { //need to destroy the ctx associated with calling thread tls_ctxStack.pop(); @@ -146,19 +144,21 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) { HIP_INIT_API(ctx); hipError_t e = hipSuccess; - ihipCtx_t* tempCtx; - *ctx = ihipGetTlsDefaultCtx(); + ihipCtx_t* currentCtx = ihipGetTlsDefaultCtx(); + auto deviceHandle = currentCtx->getDevice(); + *ctx = currentCtx; + if(!tls_ctxStack.empty()) { tls_ctxStack.pop(); } + if(!tls_ctxStack.empty()) { - tempCtx= tls_ctxStack.top(); - } - else { - tempCtx = nullptr; + currentCtx= tls_ctxStack.top(); + } else { + currentCtx = deviceHandle->_primaryCtx; } - ihipSetTlsDefaultCtx(tempCtx); //TOD0 - Shall check for NULL? + ihipSetTlsDefaultCtx(currentCtx); //TOD0 - Shall check for NULL? return ihipLogStatus(e); } @@ -169,8 +169,8 @@ hipError_t hipCtxPushCurrent(hipCtx_t ctx) if(ctx != NULL) { //TODO- is this check needed? ihipSetTlsDefaultCtx(ctx); tls_ctxStack.push(ctx); - } - else { + tls_getPrimaryCtx = false; + } else { e = hipErrorInvalidContext; } return ihipLogStatus(e); @@ -180,12 +180,11 @@ hipError_t hipCtxGetCurrent(hipCtx_t* ctx) { HIP_INIT_API(ctx); hipError_t e = hipSuccess; - if(!tls_ctxStack.empty()) { + if((tls_getPrimaryCtx) || tls_ctxStack.empty()) { + *ctx = ihipGetTlsDefaultCtx(); + } else { *ctx= tls_ctxStack.top(); } - else { - *ctx = NULL; - } return ihipLogStatus(e); } @@ -195,10 +194,10 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) hipError_t e = hipSuccess; if(ctx == NULL) { tls_ctxStack.pop(); - } - else { + } else { ihipSetTlsDefaultCtx(ctx); tls_ctxStack.push(ctx); + tls_getPrimaryCtx = false; } return ihipLogStatus(e); } @@ -213,8 +212,7 @@ hipError_t hipCtxGetDevice(hipDevice_t *device) if(ctx == nullptr) { e = hipErrorInvalidContext; // TODO *device = nullptr; - } - else { + } else { auto deviceHandle = ctx->getDevice(); *device = deviceHandle->_deviceId; } diff --git a/src/hip_db.cpp b/src/hip_db.cpp new file mode 100644 index 0000000000..7565d70422 --- /dev/null +++ b/src/hip_db.cpp @@ -0,0 +1,12 @@ + +#include + + + + +void hipdbPrintMem(void *targetAddress) +{ + hc::am_memtracker_print(targetAddress); +}; + + diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 2bb9970d35..ff511b5509 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -146,13 +146,14 @@ hipError_t hipSetDevice(int deviceId) return ihipLogStatus(hipErrorInvalidDevice); } else { ihipSetTlsDefaultCtx(ihipGetPrimaryCtx(deviceId)); + tls_getPrimaryCtx = true; return ihipLogStatus(hipSuccess); } } hipError_t hipDeviceSynchronize(void) { - HIP_INIT_API(); + HIP_INIT_SPECIAL_API(TRACE_SYNC); return ihipLogStatus(ihipSynchronize()); } @@ -180,6 +181,7 @@ hipError_t hipDeviceReset(void) return ihipLogStatus(hipSuccess); } + hipError_t ihipDeviceSetState(void) { hipError_t e = hipErrorInvalidContext; @@ -272,6 +274,9 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { HIP_INIT_API(pi, attr, device); + if ((device < 0) || (device >= g_deviceCnt)) { + return ihipLogStatus(hipErrorInvalidDevice); + } return ihipLogStatus(ihipDeviceGetAttribute(pi,attr,device)); } @@ -298,6 +303,9 @@ hipError_t ihipGetDeviceProperties(hipDeviceProp_t* props, int device) hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { HIP_INIT_API(props, device); + if ((device < 0) || (device >= g_deviceCnt)) { + return ihipLogStatus(hipErrorInvalidDevice); + } return ihipLogStatus(ihipGetDeviceProperties(props, device)); } @@ -350,41 +358,44 @@ hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device { HIP_INIT_API(major,minor, device); hipError_t e = hipSuccess; - e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device); - e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device); + if ((device < 0) || (device >= g_deviceCnt)) { + e = hipErrorInvalidDevice; + } else { + e = ihipDeviceGetAttribute(major, hipDeviceAttributeComputeCapabilityMajor, device); + e = ihipDeviceGetAttribute(minor, hipDeviceAttributeComputeCapabilityMinor, device); + } return ihipLogStatus(e); } hipError_t hipDeviceGetName(char *name,int len,hipDevice_t device) { - HIP_INIT_API(name,len, device); + // Cast to void* here to avoid printing garbage in debug modes. + HIP_INIT_API((void*)name,len, device); hipError_t e = hipSuccess; - auto deviceHandle = ihipGetDevice(device); - int nameLen = strlen(deviceHandle->_props.name); - if(nameLen <= len) - memcpy(name,deviceHandle->_props.name,nameLen); + if ((device < 0) || (device >= g_deviceCnt)) { + e = hipErrorInvalidDevice; + } else { + auto deviceHandle = ihipGetDevice(device); + int nameLen = strlen(deviceHandle->_props.name); + if(nameLen <= len) + memcpy(name,deviceHandle->_props.name,nameLen); + } return ihipLogStatus(e); } hipError_t hipDeviceGetPCIBusId (char *pciBusId,int len, int device) { - HIP_INIT_API(pciBusId, len, device); + // Cast to void* here to avoid printing garbage in debug modes. + HIP_INIT_API((void*)pciBusId, len, device); hipError_t e = hipErrorInvalidValue; - int deviceCount = 0; - ihipGetDeviceCount( &deviceCount ); - if((device > deviceCount) || (device < 0)) { + if ((device < 0) || (device >= g_deviceCnt)) { e = hipErrorInvalidDevice; } else { if((pciBusId != nullptr) && (len > 0)) { - int tempPciBusId = 0; - e = ihipDeviceGetAttribute( &tempPciBusId, hipDeviceAttributePciBusId, device); - if( e == hipSuccess) { - std::string tempPciStr = std::to_string(tempPciBusId); - if( len < tempPciStr.length()){ - e = hipErrorInvalidValue; - } else { - memcpy( pciBusId , tempPciStr.c_str() , tempPciStr.length() ); - } + auto deviceHandle = ihipGetDevice(device); + int retVal = snprintf(pciBusId,len, "%04x:%02x:%02x.0",deviceHandle->_props.pciDomainID,deviceHandle->_props.pciBusID,deviceHandle->_props.pciDeviceID); + if( retVal > 0 && retVal < len) { + e = hipSuccess; } } } @@ -395,26 +406,38 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) { HIP_INIT_API(bytes, device); hipError_t e = hipSuccess; - auto deviceHandle = ihipGetDevice(device); - *bytes= deviceHandle->_props.totalGlobalMem; + if ((device < 0) || (device >= g_deviceCnt)) { + e = hipErrorInvalidDevice; + } else { + auto deviceHandle = ihipGetDevice(device); + *bytes= deviceHandle->_props.totalGlobalMem; + } return ihipLogStatus(e); } -hipError_t hipDeviceGetByPCIBusId (int* device, const int* pciBusId ) +hipError_t hipDeviceGetByPCIBusId (int* device, const char* pciBusId ) { HIP_INIT_API(device,pciBusId); hipDeviceProp_t tempProp; - int deviceCount; + int deviceCount = 0 ; hipError_t e = hipErrorInvalidValue; - ihipGetDeviceCount( &deviceCount ); - *device = 0; - for (int i = 0; i< deviceCount; i++) { - ihipGetDeviceProperties( &tempProp, i ); - if(tempProp.pciBusID == *pciBusId) { - *device =i; - e = hipSuccess; - break; - } + if((device != nullptr) && (pciBusId != nullptr)) { + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + int len = 0; + len = sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID); + if(len == 3) { + ihipGetDeviceCount( &deviceCount ); + for (int i = 0; i< deviceCount; i++) { + ihipGetDeviceProperties( &tempProp, i ); + if(tempProp.pciBusID == pciBusID) { + *device = i; + e = hipSuccess; + break; + } + } + } } return ihipLogStatus(e); } diff --git a/src/hip_event.cpp b/src/hip_event.cpp index ab1c43a00b..d1ee37a45e 100644 --- a/src/hip_event.cpp +++ b/src/hip_event.cpp @@ -55,13 +55,13 @@ void ihipEvent_t::attachToCompletionFuture(const hc::completion_future *cf, void ihipEvent_t::refereshEventStatus() { - bool isReady0 = _marker.is_ready(); + bool isReady0 = locked_isReady(); bool isReady1; int val = 0; if (_state == hipEventStatusRecording) { // TODO - use completion-future functions to obtain ticks and timestamps: hsa_signal_t *sig = static_cast (_marker.get_native_handle()); - isReady1 = _marker.is_ready(); + isReady1 = locked_isReady(); if (sig) { val = hsa_signal_load_acquire(*sig); if (val == 0) { @@ -86,6 +86,17 @@ void ihipEvent_t::refereshEventStatus() } +bool ihipEvent_t::locked_isReady() +{ + return _stream->locked_eventIsReady(this); +} + +void ihipEvent_t::locked_waitComplete(hc::hcWaitMode waitMode) +{ + return _stream->locked_eventWaitComplete(this, waitMode); +} + + hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) { hipError_t e = hipSuccess; @@ -127,7 +138,7 @@ hipError_t hipEventCreate(hipEvent_t* event) hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream) { - HIP_INIT_API(event, stream); + HIP_INIT_SPECIAL_API(TRACE_SYNC, event, stream); if (event && event->_state != hipEventStatusUnitialized) { stream = ihipSyncAndResolveStream(stream); @@ -164,18 +175,21 @@ hipError_t hipEventDestroy(hipEvent_t event) { HIP_INIT_API(event); - event->_state = hipEventStatusUnitialized; + if (event) { + event->_state = hipEventStatusUnitialized; - delete event; - event = NULL; + delete event; + event = NULL; - // TODO - examine return additional error codes - return ihipLogStatus(hipSuccess); + return ihipLogStatus(hipSuccess); + } else { + return ihipLogStatus(hipErrorInvalidResourceHandle); + } } hipError_t hipEventSynchronize(hipEvent_t event) { - HIP_INIT_API(event); + HIP_INIT_SPECIAL_API(TRACE_SYNC, event); if (event) { if (event->_state == hipEventStatusUnitialized) { @@ -189,9 +203,7 @@ hipError_t hipEventSynchronize(hipEvent_t event) ctx->locked_syncDefaultStream(true, true); return ihipLogStatus(hipSuccess); } else { - event->_marker.wait((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); - - assert (event->_marker.is_ready()); + event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); return ihipLogStatus(hipSuccess); } @@ -254,9 +266,9 @@ hipError_t hipEventElapsedTime(float *ms, hipEvent_t start, hipEvent_t stop) hipError_t hipEventQuery(hipEvent_t event) { - HIP_INIT_API(event); + HIP_INIT_SPECIAL_API(TRACE_QUERY, event); - if ((event->_state == hipEventStatusRecording) && (!event->_marker.is_ready())) { + if ((event->_state == hipEventStatusRecording) && !event->locked_isReady()) { return ihipLogStatus(hipErrorNotReady); } else { return ihipLogStatus(hipSuccess); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index be591f2f04..2c98ac804f 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -83,20 +83,35 @@ int HIP_HIDDEN_FREE_MEM = 256; int HIP_FORCE_SYNC_COPY = 0; // TODO - set these to 0 and 1 -int HIP_EVENT_SYS_RELEASE=1; -int HIP_COHERENT_HOST_ALLOC = 0; +int HIP_EVENT_SYS_RELEASE=0; +int HIP_HOST_COHERENT = 1; -// TODO - set to 0 once we resolve stability. -// USE_ HIP_SYNC_HOST_ALLOC int HIP_SYNC_HOST_ALLOC = 1; + +int HIP_INIT_ALLOC=-1; +int HIP_SYNC_STREAM_WAIT = 0; +int HIP_FORCE_NULL_STREAM=0; + + + +#if (__hcc_workweek__ >= 17300) +// Make sure we have required bug fix in HCC +// Perform resolution on the GPU: // Chicken bit to sync on host to implement null stream. // If 0, null stream synchronization is performed on the GPU int HIP_SYNC_NULL_STREAM = 0; +#else +int HIP_SYNC_NULL_STREAM = 1; +#endif // HIP needs to change some behavior based on HCC_OPT_FLUSH : -// TODO - set this to 1 +#if (__hcc_workweek__ >= 17296) int HCC_OPT_FLUSH = 1; +#else +#warning "HIP disabled HCC_OPT_FLUSH since HCC version does not yet support" +int HCC_OPT_FLUSH = 0; +#endif @@ -258,8 +273,6 @@ ihipStream_t::ihipStream_t(ihipCtx_t *ctx, hc::accelerator_view av, unsigned int case hipDeviceScheduleBlockingSync : _scheduleMode = Yield; break; default:_scheduleMode = Auto; }; - - }; @@ -319,14 +332,34 @@ void ihipStream_t::locked_wait() // Causes current stream to wait for specified event to complete: // Note this does not provide any kind of host serialization. -void ihipStream_t::locked_waitEvent(hipEvent_t event) +void ihipStream_t::locked_streamWaitEvent(hipEvent_t event) { LockedAccessor_StreamCrit_t crit(_criticalData); - crit->_av.create_blocking_marker(event->_marker, hc::accelerator_scope); + crit->_av.create_blocking_marker(event->marker(), hc::accelerator_scope); } + +// Causes current stream to wait for specified event to complete: +// Note this does not provide any kind of host serialization. +bool ihipStream_t::locked_eventIsReady(hipEvent_t event) +{ + // Event query that returns "Complete" may cause HCC to manipulate + // internal queue state so lock the stream's queue here. + LockedAccessor_StreamCrit_t crit(_criticalData); + + return (event->marker().is_ready()); +} + +void ihipStream_t::locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode) +{ + LockedAccessor_StreamCrit_t crit(_criticalData); + + event->marker().wait(waitMode); +} + + // Create a marker in this stream. // Save state in the event so it can track the status of the event. void ihipStream_t::locked_recordEvent(hipEvent_t event) @@ -345,7 +378,7 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event) scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope; } - event->_marker = crit->_av.create_marker(scopeFlag); + event->marker(crit->_av.create_marker(scopeFlag)); }; //============================================================================= @@ -737,21 +770,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) char archName[256]; err = hsa_agent_get_info(_hsaAgent, HSA_AGENT_INFO_NAME, &archName); - if(strcmp(archName,"gfx701")==0){ - prop->gcnArch = 701; - } - if(strcmp(archName,"gfx801")==0){ - prop->gcnArch = 801; - } - if(strcmp(archName,"gfx802")==0){ - prop->gcnArch = 802; - } - if(strcmp(archName,"gfx803")==0){ - prop->gcnArch = 803; - } - if(strcmp(archName,"gfx900")==0){ - prop->gcnArch = 900; - } + prop->gcnArch = atoi(archName+3); DeviceErrorCheck(err); @@ -799,13 +818,13 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) DeviceErrorCheck(err); // BDFID is 16bit uint: [8bit - BusID | 5bit - Device ID | 3bit - Function/DomainID] - // prop->pciDomainID = bdf_id & 0x7; + prop->pciDomainID = bdf_id & 0x7; prop->pciDeviceID = (bdf_id>>3) & 0x1F; prop->pciBusID = (bdf_id>>8) & 0xFF; // Masquerade as a 3.0-level device. This will change as more HW functions are properly supported. // Application code should use the arch.has* to do detailed feature detection. - prop->major = 2; + prop->major = 3; prop->minor = 0; // Get number of Compute Unit @@ -1233,10 +1252,14 @@ void HipReadEnv() READ_ENV_I(release, HIP_FAIL_SOC, 0, "Fault on Sub-Optimal-Copy, rather than use a slower but functional implementation. Bit 0x1=Fail on async copy with unpinned memory. Bit 0x2=Fail peer copy rather than use staging buffer copy"); READ_ENV_I(release, HIP_SYNC_HOST_ALLOC, 0, "Sync before and after all host memory allocations. May help stability"); + READ_ENV_I(release, HIP_INIT_ALLOC, 0, "If not -1, initialize allocated memory to specified byte"); READ_ENV_I(release, HIP_SYNC_NULL_STREAM, 0, "Synchronize on host for null stream submissions"); + READ_ENV_I(release, HIP_FORCE_NULL_STREAM, 0, "Force all stream allocations to secretly return the null stream"); + + READ_ENV_I(release, HIP_SYNC_STREAM_WAIT, 0, "hipStreamWaitEvent will synchronize to host"); - READ_ENV_I(release, HIP_COHERENT_HOST_ALLOC, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact."); + READ_ENV_I(release, HIP_HOST_COHERENT, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact."); READ_ENV_I(release, HCC_OPT_FLUSH, 0, "When set, use agent-scope fence operations rather than system-scope fence operationsflush when possible. This flag controls both HIP and HCC behavior."); @@ -1434,9 +1457,7 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c { if ((HIP_TRACE_API & (1<grid_dim @@ -1444,6 +1465,11 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c << " sharedMem:+" << lp->dynamic_group_mem_bytes << " " << *stream; + if (COMPILE_HIP_DB && HIP_TRACE_API) { + std::string fullStr; + recordApiTrace(&fullStr, os.str()); + } + if (HIP_PROFILE_API == 0x1) { std::string shortAtpString("hipLaunchKernel:"); shortAtpString += kernelName; @@ -1451,10 +1477,6 @@ void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, c } else if (HIP_PROFILE_API == 0x2) { MARKER_BEGIN(os.str().c_str(), "HIP"); } - - if (COMPILE_HIP_DB && HIP_TRACE_API) { - std::cerr << API_COLOR << os.str() << API_COLOR_END << std::endl; - } } } @@ -1872,8 +1894,13 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, } hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); +#endif bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); @@ -1908,7 +1935,11 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes, } void ihipStream_t::addSymbolPtrToTracker(hc::accelerator& acc, void* ptr, size_t sizeBytes) { +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo ptrInfo(NULL, ptr, ptr, sizeBytes, acc, true, false); +#else hc::AmPointerInfo ptrInfo(NULL, ptr, sizeBytes, acc, true, false); +#endif hc::am_memtracker_add(ptr, ptrInfo); } @@ -1932,7 +1963,11 @@ void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void* { // TODO - review - this looks broken , should not be adding pointers to tracker dynamically: if(kind == hipMemcpyHostToDevice) { +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); +#endif bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); if(srcTracked) { addSymbolPtrToTracker(acc, dst, sizeBytes); @@ -1944,7 +1979,11 @@ void ihipStream_t::lockedSymbolCopyAsync(hc::accelerator &acc, void* dst, void* } } if(kind == hipMemcpyDeviceToHost) { +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); +#endif bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS); if(dstTracked) { addSymbolPtrToTracker(acc, src, sizeBytes); @@ -1983,8 +2022,13 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes } else { hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo dstPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); + hc::AmPointerInfo srcPtrInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0); hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0); +#endif bool dstTracked = getTailoredPtrInfo(&dstPtrInfo, dst, sizeBytes); bool srcTracked = getTailoredPtrInfo(&srcPtrInfo, src, sizeBytes); diff --git a/src/hip_hcc_internal.h b/src/hip_hcc_internal.h index 1499406780..4b7e533a4c 100644 --- a/src/hip_hcc_internal.h +++ b/src/hip_hcc_internal.h @@ -59,14 +59,18 @@ extern int HIP_STREAM_SIGNALS; /* number of signals to allocate at stream creat extern int HIP_VISIBLE_DEVICES; /* Contains a comma-separated sequence of GPU identifiers */ extern int HIP_FORCE_P2P_HOST; -extern int HIP_COHERENT_HOST_ALLOC; +extern int HIP_HOST_COHERENT; extern int HIP_HIDDEN_FREE_MEM; //--- // Chicken bits for disabling functionality to work around potential issues: extern int HIP_SYNC_HOST_ALLOC; +extern int HIP_SYNC_STREAM_WAIT; extern int HIP_SYNC_NULL_STREAM; +extern int HIP_INIT_ALLOC; +extern int HIP_FORCE_NULL_STREAM; + // TODO - remove when this is standard behavior. extern int HCC_OPT_FLUSH; @@ -114,6 +118,7 @@ private: //Extern tls extern thread_local hipError_t tls_lastHipError; extern thread_local TidInfo tls_tidInfo; +extern thread_local bool tls_getPrimaryCtx; extern std::vector g_dbStartTriggers; extern std::vector g_dbStopTriggers; @@ -190,10 +195,12 @@ extern const char *API_COLOR_END; //--- //HIP Trace modes - use with HIP_TRACE_API=... -#define TRACE_ALL 0 // 0x1 -#define TRACE_KCMD 1 // 0x2, kernel command -#define TRACE_MCMD 2 // 0x4, memory command -#define TRACE_MEM 3 // 0x8, memory allocation or deallocation. +#define TRACE_ALL 0 // 0x01 +#define TRACE_KCMD 1 // 0x02, kernel command +#define TRACE_MCMD 2 // 0x04, memory command +#define TRACE_MEM 3 // 0x08, memory allocation or deallocation. +#define TRACE_SYNC 4 // 0x10, synchronization (host or hipStreamWaitEvent) +#define TRACE_QUERY 5 // 0x20, hipEventRecord, hipEventQuery, hipStreamQuery //--- @@ -514,9 +521,12 @@ public: hc::accelerator_view* locked_getAv() { LockedAccessor_StreamCrit_t crit(_criticalData); return &(crit->_av); }; - void locked_waitEvent(hipEvent_t event); + void locked_streamWaitEvent(hipEvent_t event); void locked_recordEvent(hipEvent_t event); + bool locked_eventIsReady(hipEvent_t event); + void locked_eventWaitComplete(hipEvent_t event, hc::hcWaitMode waitMode); + ihipStreamCritical_t &criticalData() { return _criticalData; }; //--- @@ -605,18 +615,24 @@ public: ihipEvent_t(unsigned flags); void attachToCompletionFuture(const hc::completion_future *cf, hipStream_t stream, ihipEventType_t eventType); void refereshEventStatus(); + hc::completion_future & marker() { return _marker; } + void marker(hc::completion_future cf) { _marker = cf; }; + + bool locked_isReady(); + void locked_waitComplete(hc::hcWaitMode waitMode); + uint64_t timestamp() const { return _timestamp; } ; ihipEventType_t type() const { return _type; }; public: hipEventStatus_t _state; - hipStream_t _stream; // Stream where the event is recorded, or NULL if all streams. + hipStream_t _stream; // Stream where the event is recorded. Null stream is resolved to actual stream when recorded unsigned _flags; - hc::completion_future _marker; private: + hc::completion_future _marker; ihipEventType_t _type; uint64_t _timestamp; // store timestamp, may be set on host or by marker. friend hipError_t hipEventRecord(hipEvent_t event, hipStream_t stream); diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp index 6728ae42d3..a8324c5729 100644 --- a/src/hip_memory.cpp +++ b/src/hip_memory.cpp @@ -27,10 +27,6 @@ THE SOFTWARE. #include "hip/hip_runtime.h" #include "hip_hcc_internal.h" #include "trace_helper.h" -#include "hip/hcc_detail/hip_texture.h" -#include - - // Internal HIP APIS: namespace hip_internal { @@ -105,17 +101,29 @@ int sharePtr(void *ptr, ihipCtx_t *ctx, bool shareWithAll, unsigned hipFlags) // Allocate a new pointer with am_alloc and share with all valid peers. // Returns null-ptr if a memory error occurs (either allocation or sharing) -void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool shareWithAll, unsigned amFlags, unsigned hipFlags) +void * allocAndSharePtr(const char *msg, size_t sizeBytes, ihipCtx_t *ctx, bool shareWithAll, unsigned amFlags, unsigned hipFlags, size_t alignment) { void *ptr = nullptr; auto device = ctx->getWriteableDevice(); - ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags); +#if (__hcc_workweek__ >= 17332) + if (alignment != 0) { + ptr = hc::am_aligned_alloc(sizeBytes, device->_acc, amFlags, alignment); + } else +#endif + { + ptr = hc::am_alloc(sizeBytes, device->_acc, amFlags); + } tprintf(DB_MEM, " alloc %s ptr:%p-%p size:%zu on dev:%d\n", msg, ptr, static_cast(ptr)+sizeBytes, sizeBytes, device->_deviceId); + if (HIP_INIT_ALLOC != -1) { + // TODO , dont' call HIP API directly here: + hipMemset(ptr, HIP_INIT_ALLOC, sizeBytes); + } + if (ptr != nullptr) { int r = sharePtr(ptr, ctx, shareWithAll, hipFlags); if (r != 0) { @@ -147,41 +155,45 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t *attributes, const void HIP_INIT_API(attributes, ptr); hipError_t e = hipSuccess; - - hc::accelerator acc; - hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); - am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); - if (status == AM_SUCCESS) { - - attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost; - attributes->hostPointer = amPointerInfo._hostPointer; - attributes->devicePointer = amPointerInfo._devicePointer; - attributes->isManaged = 0; - if(attributes->memoryType == hipMemoryTypeHost){ - attributes->hostPointer = (void*)ptr; - } - if(attributes->memoryType == hipMemoryTypeDevice){ - attributes->devicePointer = (void*)ptr; - } - attributes->allocationFlags = amPointerInfo._appAllocationFlags; - attributes->device = amPointerInfo._appId; - - if (attributes->device < 0) { - e = hipErrorInvalidDevice; - } - - + if((attributes == nullptr) || (ptr == nullptr)) { + e = hipErrorInvalidValue; } else { - attributes->memoryType = hipMemoryTypeDevice; - attributes->hostPointer = 0; - attributes->devicePointer = 0; - attributes->device = -1; - attributes->isManaged = 0; - attributes->allocationFlags = 0; + hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else + hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif + am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); + if (status == AM_SUCCESS) { - e = hipErrorUnknown; // TODO - should be hipErrorInvalidValue ? + attributes->memoryType = amPointerInfo._isInDeviceMem ? hipMemoryTypeDevice: hipMemoryTypeHost; + attributes->hostPointer = amPointerInfo._hostPointer; + attributes->devicePointer = amPointerInfo._devicePointer; + attributes->isManaged = 0; + if(attributes->memoryType == hipMemoryTypeHost){ + attributes->hostPointer = (void*)ptr; + } + if(attributes->memoryType == hipMemoryTypeDevice){ + attributes->devicePointer = (void*)ptr; + } + attributes->allocationFlags = amPointerInfo._appAllocationFlags; + attributes->device = amPointerInfo._appId; + + if (attributes->device < 0) { + e = hipErrorInvalidDevice; + } + } else { + attributes->memoryType = hipMemoryTypeDevice; + attributes->hostPointer = 0; + attributes->devicePointer = 0; + attributes->device = -1; + attributes->isManaged = 0; + attributes->allocationFlags = 0; + + e = hipErrorUnknown; // TODO - should be hipErrorInvalidValue ? + } } - return ihipLogStatus(e); } @@ -192,14 +204,17 @@ hipError_t hipHostGetDevicePointer(void **devicePointer, void *hostPointer, unsi hipError_t e = hipSuccess; - *devicePointer = NULL; - // Flags must be 0: - if (flags != 0) { + if ((flags != 0) || (devicePointer == nullptr) || (hostPointer == nullptr)){ e = hipErrorInvalidValue; } else { hc::accelerator acc; + *devicePointer = NULL; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPointer); if (status == AM_SUCCESS) { *devicePointer = static_cast(amPointerInfo._devicePointer) + (static_cast(hostPointer) - static_cast(amPointerInfo._hostPointer)) ; @@ -230,7 +245,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) } else { auto device = ctx->getWriteableDevice(); - *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false/*shareWithAll*/, 0/*amFlags*/, 0/*hipFlags*/); + *ptr = hip_internal::allocAndSharePtr("device_mem", sizeBytes, ctx, false/*shareWithAll*/, 0/*amFlags*/, 0/*hipFlags*/, 0); if(sizeBytes && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; @@ -293,12 +308,12 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) amFlags = amHostNonCoherent; } else { // depends on env variables: - amFlags = HIP_COHERENT_HOST_ALLOC ? amHostCoherent : amHostNonCoherent; + amFlags = HIP_HOST_COHERENT ? amHostCoherent : amHostNonCoherent; } *ptr = hip_internal::allocAndSharePtr((amFlags & amHostCoherent) ? "finegrained_host":"pinned_host", - sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags); + sizeBytes, ctx, (trueFlags & hipHostMallocPortable) /*shareWithAll*/, amFlags, flags, 0); if(sizeBytes && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; @@ -344,10 +359,29 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height //err = hipMalloc(ptr, (*pitch)*height); if (ctx) { - auto device = ctx->getWriteableDevice(); + hc::accelerator acc = ctx->getDevice()->_acc; + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + size_t allocGranularity = 0; + hsa_amd_memory_pool_t *allocRegion = static_cast(acc.get_hsa_am_region()); + hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity); + + hsa_ext_image_descriptor_t imageDescriptor; + imageDescriptor.width = *pitch; + imageDescriptor.height = height; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + imageDescriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R; + imageDescriptor.format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + hsa_ext_image_data_info_t imageInfo; + hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); + size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; const unsigned am_flags = 0; - *ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, false/*shareWithAll*/, am_flags, 0); + *ptr = hip_internal::allocAndSharePtr("device_pitch", sizeBytes, ctx, false/*shareWithAll*/, am_flags, 0, alignment); if (sizeBytes && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; @@ -367,26 +401,128 @@ hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannel return cd; } +extern void getChannelOrderAndType(const hipChannelFormatDesc& desc, + enum hipTextureReadMode readMode, + hsa_ext_image_channel_order_t& channelOrder, + hsa_ext_image_channel_type_t& channelType); + hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) { HIP_INIT_SPECIAL_API((TRACE_MEM), array, desc, width, height, flags); HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; + if(width > 0) { + auto ctx = ihipGetTlsDefaultCtx(); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = flags; + array[0]->width = width; + array[0]->height = height; + array[0]->depth = 1; + array[0]->desc = *desc; + + void ** ptr = &array[0]->data; + + if (ctx) { + const unsigned am_flags = 0; + size_t size = width; + if(height > 0) { + size = size * height; + } + + size_t allocSize = 0; + switch(desc->f) { + case hipChannelFormatKindSigned: + allocSize = size * sizeof(int); + break; + case hipChannelFormatKindUnsigned: + allocSize = size * sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + allocSize = size * sizeof(float); + break; + case hipChannelFormatKindNone: + allocSize = size * sizeof(size_t); + break; + default: + hip_status = hipErrorUnknown; + break; + } + hc::accelerator acc = ctx->getDevice()->_acc; + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + size_t allocGranularity = 0; + hsa_amd_memory_pool_t *allocRegion = static_cast(acc.get_hsa_am_region()); + hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity); + + hsa_ext_image_descriptor_t imageDescriptor; + + imageDescriptor.width = width; + imageDescriptor.height = height; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + switch (flags) { + case hipArrayLayered: + case hipArrayCubemap: + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + assert(0); + break; + case hipArrayDefault: + default: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + break; + } + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, channelOrder, channelType); + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + hsa_ext_image_data_info_t imageInfo; + hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); + size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; + + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false/*shareWithAll*/, am_flags, 0, alignment); + if (size && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } + + } else { + hip_status = hipErrorMemoryAllocation; + } + } else { + hip_status = hipErrorInvalidValue; + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipMalloc3DArray(hipArray_t *array, + const struct hipChannelFormatDesc* desc, + struct hipExtent extent, + unsigned int flags) +{ + HIP_INIT(); + HIP_SET_DEVICE(); + hipError_t hip_status = hipSuccess; auto ctx = ihipGetTlsDefaultCtx(); *array = (hipArray*)malloc(sizeof(hipArray)); - array[0]->width = width; - array[0]->height = height; - - array[0]->f = desc->f; + array[0]->type = flags; + array[0]->width = extent.width; + array[0]->height = extent.height; + array[0]->depth = extent.depth; + array[0]->desc = *desc; void ** ptr = &array[0]->data; if (ctx) { const unsigned am_flags = 0; - const size_t size = width*height; + const size_t size = extent.width*extent.height*extent.depth; size_t allocSize = 0; switch(desc->f) { @@ -406,7 +542,48 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, hip_status = hipErrorUnknown; break; } - *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false/*shareWithAll*/, am_flags, 0); + + hc::accelerator acc = ctx->getDevice()->_acc; + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + size_t allocGranularity = 0; + hsa_amd_memory_pool_t *allocRegion = static_cast(acc.get_hsa_am_region()); + hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity); + + hsa_ext_image_descriptor_t imageDescriptor; + imageDescriptor.width = extent.width; + imageDescriptor.height = extent.height; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + switch (flags) { + case hipArrayLayered: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; + imageDescriptor.array_size = extent.depth; + break; + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + case hipArrayDefault: + assert(0); + break; + case hipArrayCubemap: + default: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D; + imageDescriptor.depth = extent.depth; + break; + } + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + getChannelOrderAndType(*desc, hipReadModeElementType, channelOrder, channelType); + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + hsa_ext_image_data_info_t imageInfo; + hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); + size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; + + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment); + if (size && (*ptr == NULL)) { hip_status = hipErrorMemoryAllocation; } @@ -415,7 +592,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, hip_status = hipErrorMemoryAllocation; } - return ihipLogStatus(hip_status); + return hip_status; } hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) @@ -425,7 +602,11 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) hipError_t hip_status = hipSuccess; hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr); if(status == AM_SUCCESS){ *flagsPtr = amPointerInfo._appAllocationFlags; @@ -456,7 +637,11 @@ hipError_t hipHostRegister(void *hostPtr, size_t sizeBytes, unsigned int flags) } hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t am_status = hc::am_memtracker_getinfo(&amPointerInfo, hostPtr); if(am_status == AM_SUCCESS){ @@ -826,7 +1011,7 @@ hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, return ihipLogStatus(e); } -hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, +hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream); if(width > dpitch || width > spitch) @@ -857,7 +1042,7 @@ hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, con size_t byteSize; if(dst) { - switch(dst[0].f) { + switch(dst[0].desc.f) { case hipChannelFormatKindSigned: byteSize = sizeof(int); break; @@ -918,6 +1103,56 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, return ihipLogStatus(e); } +hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) +{ + HIP_INIT_SPECIAL_API((TRACE_MCMD), p); + + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + + hc::completion_future marker; + + hipError_t e = hipSuccess; + + size_t byteSize; + if(p) { + switch(p->dstArray->desc.f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 0; + break; + } + } else { + return ihipLogStatus(hipErrorUnknown); + } + + try { + for (int i = 0; i < p->extent.depth; i++) { + for(int j = 0; j < p->extent.height; j++) { + // TODO: p->srcPos or p->dstPos are not 0. + unsigned char* src = (unsigned char*)p->srcPtr.ptr + i*p->srcPtr.ysize*p->srcPtr.pitch + j*p->srcPtr.pitch; + unsigned char* dst = (unsigned char*)p->dstArray->data + i*p->dstArray->height*p->dstArray->width*byteSize + j*p->dstArray->width*byteSize; + stream->locked_copySync(dst, src, p->extent.width*byteSize, p->kind); + } + } + } + catch (ihipException ex) { + e = ex._code; + } + + return ihipLogStatus(e); +} + // TODO - make member function of stream? template void @@ -956,6 +1191,7 @@ ihipMemsetKernel(hipStream_t stream, } + // TODO-sync: function is async unless target is pinned host memory - then these are fully sync. hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream ) { @@ -1006,7 +1242,7 @@ hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t s return ihipLogStatus(e); }; -hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) +hipError_t hipMemset(void* dst, int value, size_t sizeBytes) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, value, sizeBytes); @@ -1058,6 +1294,58 @@ hipError_t hipMemset(void* dst, int value, size_t sizeBytes ) return ihipLogStatus(e); } +hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t height) +{ + HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, pitch, value, width, height); + + hipError_t e = hipSuccess; + + hipStream_t stream = hipStreamNull; + // TODO - call an ihip memset so HIP_TRACE is correct. + stream = ihipSyncAndResolveStream(stream); + + if (stream) { + auto crit = stream->lockopen_preKernelCommand(); + + hc::completion_future cf ; + + size_t sizeBytes = pitch * height; + if ((sizeBytes & 0x3) == 0) { + // use a faster dword-per-workitem copy: + try { + value = value & 0xff; + uint32_t value32 = (value << 24) | (value << 16) | (value << 8) | (value) ; + ihipMemsetKernel (stream, crit, static_cast (dst), value32, sizeBytes/sizeof(uint32_t), &cf); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } else { + // use a slow byte-per-workitem copy: + try { + ihipMemsetKernel (stream, crit, static_cast (dst), value, sizeBytes, &cf); + } + catch (std::exception &ex) { + e = hipErrorInvalidValue; + } + } + // TODO - is hipMemset supposed to be async? + cf.wait(); + + stream->lockclose_postKernelCommand("hipMemset", &crit->_av); + + if (HIP_LAUNCH_BLOCKING) { + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING wait for memset in %s.\n", __func__, ToString(stream).c_str()); + cf.wait(); + tprintf (DB_SYNC, "'%s' LAUNCH_BLOCKING memset completed in %s.\n", __func__, ToString(stream).c_str()); + } + } else { + e = hipErrorInvalidValue; + } + + return ihipLogStatus(e); +} + hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes ) { HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, value, sizeBytes); @@ -1108,7 +1396,7 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeByte return ihipLogStatus(e); } -hipError_t hipMemGetInfo (size_t *free, size_t *total) +hipError_t hipMemGetInfo(size_t *free, size_t *total) { HIP_INIT_API(free, total); @@ -1154,7 +1442,11 @@ hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) if(ptr != nullptr && size != nullptr){ hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if(status == AM_SUCCESS){ *size = amPointerInfo._sizeBytes; @@ -1179,7 +1471,11 @@ hipError_t hipFree(void* ptr) if (ptr) { hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if(status == AM_SUCCESS){ if(amPointerInfo._hostPointer == NULL){ @@ -1207,7 +1503,11 @@ hipError_t hipHostFree(void* ptr) hipError_t hipStatus = hipErrorInvalidValue; if (ptr) { hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, ptr); if(status == AM_SUCCESS){ if(amPointerInfo._hostPointer == ptr){ @@ -1241,7 +1541,11 @@ hipError_t hipFreeArray(hipArray* array) if(array->data) { hc::accelerator acc; +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo(NULL, NULL, NULL, 0, acc, 0, 0); +#else hc::AmPointerInfo amPointerInfo(NULL, NULL, 0, acc, 0, 0); +#endif am_status_t status = hc::am_memtracker_getinfo(&amPointerInfo, array->data); if(status == AM_SUCCESS){ if(amPointerInfo._hostPointer == NULL){ @@ -1259,7 +1563,11 @@ hipError_t hipMemGetAddressRange ( hipDeviceptr_t* pbase, size_t* psize, hipDevi HIP_INIT_API ( pbase , psize , dptr ); hipError_t hipStatus = hipSuccess; hc::accelerator acc; - hc::AmPointerInfo amPointerInfo( NULL , NULL , 0 , acc , 0 , 0 ); +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo( NULL , NULL , NULL, 0 , acc , 0 , 0 ); +#else + hc::AmPointerInfo amPointerInfo( NULL , NULL, 0 , acc , 0 , 0 ); +#endif am_status_t status = hc::am_memtracker_getinfo( &amPointerInfo , dptr ); if (status == AM_SUCCESS) { *pbase = amPointerInfo._devicePointer; @@ -1282,7 +1590,11 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* devPtr){ if((handle == NULL) || (devPtr == NULL)) { hipStatus = hipErrorInvalidResourceHandle; } else { +#if (__hcc_workweek__ >= 17332) + hc::AmPointerInfo amPointerInfo( NULL , NULL , NULL, 0 , acc , 0 , 0 ); +#else hc::AmPointerInfo amPointerInfo( NULL , NULL , 0 , acc , 0 , 0 ); +#endif am_status_t status = hc::am_memtracker_getinfo( &amPointerInfo , devPtr ); if (status == AM_SUCCESS) { psize = (size_t)amPointerInfo._sizeBytes; diff --git a/src/hip_module.cpp b/src/hip_module.cpp index b8c032da27..74b0092b84 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -452,8 +452,7 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, lp.av->dispatch_hsa_kernel(&aql, config[1] /* kernarg*/, kernArgSize, (startEvent || stopEvent) ? &cf : nullptr -#define USE_NAMED_KERNEL 0 -#if USE_NAMED_KERNEL +#if (__hcc_workweek__ > 17312) , f->_name.c_str() #endif ); diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index 40aade28b9..7dd6efd39c 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -38,21 +38,26 @@ hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags) hipError_t e = hipSuccess; if (ctx) { - hc::accelerator acc = ctx->getWriteableDevice()->_acc; - // TODO - se try-catch loop to detect memory exception? - // - //Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete: - //This matches CUDA stream behavior: + if (HIP_FORCE_NULL_STREAM) { + *stream = 0; + } else { + hc::accelerator acc = ctx->getWriteableDevice()->_acc; - { - // Obtain mutex access to the device critical data, release by destructor - LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData()); + // TODO - se try-catch loop to detect memory exception? + // + //Note this is an execute_in_order queue, so all kernels submitted will atuomatically wait for prev to complete: + //This matches CUDA stream behavior: - auto istream = new ihipStream_t(ctx, acc.create_view(), flags); + { + // Obtain mutex access to the device critical data, release by destructor + LockedAccessor_CtxCrit_t ctxCrit(ctx->criticalData()); - ctxCrit->addStream(istream); - *stream = istream; + auto istream = new ihipStream_t(ctx, acc.create_view(), flags); + + ctxCrit->addStream(istream); + *stream = istream; + } } tprintf(DB_SYNC, "hipStreamCreate, %s\n", ToString(*stream).c_str()); @@ -84,7 +89,7 @@ hipError_t hipStreamCreate(hipStream_t *stream) hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags) { - HIP_INIT_API(stream, event, flags); + HIP_INIT_SPECIAL_API(TRACE_SYNC, stream, event, flags); hipError_t e = hipSuccess; @@ -93,18 +98,15 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int } else if (event->_state != hipEventStatusUnitialized) { - if (stream != hipStreamNull) { - - // This will user create_blocking_marker to wait on the specified queue. - stream->locked_waitEvent(event); - + if (HIP_SYNC_STREAM_WAIT || (HIP_SYNC_NULL_STREAM && (stream == 0))) { + // conservative wait on host for the specified event to complete: + event->locked_waitComplete((event->_flags & hipEventBlockingSync) ? hc::hcWaitModeBlocked : hc::hcWaitModeActive); } else { - // TODO-hcc Convert to use create_blocking_marker(...) functionality. - // Currently we have a super-conservative version of this - block on host, and drain the queue. - // This should create a barrier packet in the target queue. - // TODO-HIP_SYNC_NULL_STREAM - stream->locked_wait(); + stream = ihipSyncAndResolveStream(stream); + // This will user create_blocking_marker to wait on the specified queue. + stream->locked_streamWaitEvent(event); } + } // else event not recorded, return immediately and don't create marker. return ihipLogStatus(e); @@ -114,7 +116,7 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int //--- hipError_t hipStreamQuery(hipStream_t stream) { - HIP_INIT_API(stream); + HIP_INIT_SPECIAL_API(TRACE_QUERY, stream); // Use default stream if 0 specified: if (stream == hipStreamNull) { @@ -122,15 +124,14 @@ hipError_t hipStreamQuery(hipStream_t stream) stream = device->_defaultStream; } - int pendingOps = 0; + bool isEmpty = 0; { LockedAccessor_StreamCrit_t crit(stream->_criticalData); - pendingOps = crit->_av.get_pending_async_ops(); + isEmpty = crit->_av.get_is_empty(); } - - hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess; + hipError_t e = isEmpty ? hipSuccess : hipErrorNotReady ; return ihipLogStatus(e); } @@ -140,6 +141,7 @@ hipError_t hipStreamQuery(hipStream_t stream) hipError_t hipStreamSynchronize(hipStream_t stream) { HIP_INIT_API(stream); + HIP_INIT_SPECIAL_API(TRACE_SYNC, stream); hipError_t e = hipSuccess; @@ -169,7 +171,9 @@ hipError_t hipStreamDestroy(hipStream_t stream) //--- Drain the stream: if (stream == NULL) { - e = hipErrorInvalidResourceHandle; // TODO - review - what happens if try to destroy null stream + if (!HIP_FORCE_NULL_STREAM) { + e = hipErrorInvalidResourceHandle; + } } else { stream->locked_wait(); diff --git a/src/hip_texture.cpp b/src/hip_texture.cpp new file mode 100644 index 0000000000..656c160f00 --- /dev/null +++ b/src/hip_texture.cpp @@ -0,0 +1,668 @@ + +#include + +#include + +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + +#include "hip/hip_runtime.h" +#include "hip_hcc_internal.h" +#include "trace_helper.h" + +#include "hip_texture.h" + +static std::map textureHash; + +void saveTextureInfo(const hipTexture* pTexture, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) +{ + if (pResDesc != nullptr) { + memcpy((void*)&(pTexture->resDesc), (void*)pResDesc, sizeof(hipResourceDesc)); + } + + if (pTexDesc != nullptr) { + memcpy((void*)&(pTexture->texDesc), (void*)pTexDesc, sizeof(hipTextureDesc)); + } + + if (pResViewDesc != nullptr) { + memcpy((void*)&(pTexture->resViewDesc), (void*)pResViewDesc, sizeof(hipResourceViewDesc)); + } +} + +void getChannelOrderAndType(const hipChannelFormatDesc& desc, + enum hipTextureReadMode readMode, + hsa_ext_image_channel_order_t& channelOrder, + hsa_ext_image_channel_type_t& channelType) +{ + if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w != 0) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA; + } else if (desc.x != 0 && desc.y != 0 && desc.z != 0 && desc.w == 0) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGB; + } else if (desc.x != 0 && desc.y != 0 && desc.z == 0 && desc.w == 0) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG; + } else if (desc.x != 0 && desc.y == 0 && desc.z == 0 && desc.w == 0) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R; + } else { + } + + switch (desc.f) { + case hipChannelFormatKindUnsigned: + switch(desc.x) { + case 32: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + break; + case 16: + channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16 : + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; + break; + case 8: + channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8 : + HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; + break; + default: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + } + break; + case hipChannelFormatKindSigned: + switch(desc.x) { + case 32: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + break; + case 16: + channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16 : + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16; + break; + case 8: + channelType = readMode == hipReadModeNormalizedFloat ? HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8 : + HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8; + break; + default: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + } + break; + case hipChannelFormatKindFloat: + switch(desc.x) { + case 32: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT; + break; + case 16: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT; + break; + case 8: + break; + default: + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT; + } + break; + case hipChannelFormatKindNone: + default: + break; + } +} + +void fillSamplerDescriptor(hsa_ext_sampler_descriptor_t& samplerDescriptor, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords) +{ + if (normalizedCoords) { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_NORMALIZED; + } else { + samplerDescriptor.coordinate_mode = HSA_EXT_SAMPLER_COORDINATE_MODE_UNNORMALIZED; + } + + switch (filterMode) { + case hipFilterModePoint: + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_NEAREST; + break; + case hipFilterModeLinear: + samplerDescriptor.filter_mode = HSA_EXT_SAMPLER_FILTER_MODE_LINEAR; + break; + } + + switch (addressMode) { + case hipAddressModeWrap: + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_REPEAT; + break; + case hipAddressModeClamp: + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE; + break; + case hipAddressModeMirror: + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_MIRRORED_REPEAT; + break; + case hipAddressModeBorder: + samplerDescriptor.address_mode = HSA_EXT_SAMPLER_ADDRESSING_MODE_CLAMP_TO_BORDER; + break; + } +} + +bool getHipTextureObject(hipTextureObject_t* pTexObject, + hsa_ext_image_t& image, + hsa_ext_sampler_t sampler) +{ + unsigned int* texSRD; + hipMalloc((void **) &texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4); + hipMemcpy(texSRD, (void *)image.handle, HIP_IMAGE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice); + hipMemcpy(texSRD + HIP_SAMPLER_OBJECT_OFFSET_DWORD, (void *)sampler.handle, HIP_SAMPLER_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToDevice); + *pTexObject = (hipTextureObject_t) texSRD; + +#ifdef DEBUG + unsigned int* srd = (unsigned int*) malloc(HIP_TEXTURE_OBJECT_SIZE_DWORD * 4); + hipMemcpy(srd, texSRD, HIP_TEXTURE_OBJECT_SIZE_DWORD * 4, hipMemcpyDeviceToHost); + printf("New SRD: \n"); + for (int i = 0; i < HIP_TEXTURE_OBJECT_SIZE_DWORD; i++) { + printf("SRD[%d]: %x\n", i, srd[i]); + } + printf("\n"); +#endif + return true; +} + +// Texture Object APIs +hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, + const hipResourceDesc* pResDesc, + const hipTextureDesc* pTexDesc, + const hipResourceViewDesc* pResViewDesc) +{ + HIP_INIT_API(pTexObject, pResDesc, pTexDesc, pResViewDesc); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture)); + if (pTexture != nullptr) { + memset(pTexture, 0, sizeof(hipTexture)); + saveTextureInfo(pTexture, pResDesc, pTexDesc, pResViewDesc); + } + + hsa_ext_image_descriptor_t imageDescriptor; + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + void* devPtr = nullptr; + + switch (pResDesc->resType) { + case hipResourceTypeArray: + devPtr = pResDesc->res.array.array->data; + imageDescriptor.width = pResDesc->res.array.array->width; + imageDescriptor.height = pResDesc->res.array.array->height; + switch (pResDesc->res.array.array->type) { + case hipArrayLayered: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; + imageDescriptor.depth = 0; + imageDescriptor.array_size = pResDesc->res.array.array->depth; + break; + case hipArrayCubemap: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D; + imageDescriptor.depth = pResDesc->res.array.array->depth; + imageDescriptor.array_size = 0; + break; + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + case hipArrayDefault: + default: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + break; + } + getChannelOrderAndType(pResDesc->res.array.array->desc, pTexDesc->readMode, channelOrder, channelType); + break; + case hipResourceTypeMipmappedArray: + devPtr = pResDesc->res.mipmap.mipmap->data; + imageDescriptor.width = pResDesc->res.mipmap.mipmap->width; + imageDescriptor.height = pResDesc->res.mipmap.mipmap->height; + imageDescriptor.depth = pResDesc->res.mipmap.mipmap->depth; + imageDescriptor.array_size = 0; + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + getChannelOrderAndType(pResDesc->res.mipmap.mipmap->desc, pTexDesc->readMode, channelOrder, channelType); + break; + case hipResourceTypeLinear: + devPtr = pResDesc->res.linear.devPtr; + imageDescriptor.width = pResDesc->res.linear.sizeInBytes; + imageDescriptor.height = 1; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; // ? HSA_EXT_IMAGE_DATA_LAYOUT_LINEAR + getChannelOrderAndType(pResDesc->res.linear.desc, pTexDesc->readMode, channelOrder, channelType); + break; + case hipResourceTypePitch2D: + devPtr = pResDesc->res.pitch2D.devPtr; + imageDescriptor.width = pResDesc->res.pitch2D.width; + imageDescriptor.height = pResDesc->res.pitch2D.height; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + getChannelOrderAndType(pResDesc->res.pitch2D.desc, pTexDesc->readMode, channelOrder, channelType); + break; + default: + break; + } + + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_ext_sampler_descriptor_t samplerDescriptor; + fillSamplerDescriptor(samplerDescriptor, pTexDesc->addressMode[0], pTexDesc->filterMode, pTexDesc->normalizedCoords); + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) || + HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + return ihipLogStatus(hipErrorRuntimeOther); + } + + getHipTextureObject(pTexObject, pTexture->image, pTexture->sampler); + + textureHash[*pTexObject] = pTexture; + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipDestroyTextureObject(hipTextureObject_t textureObject) +{ + HIP_INIT_API(textureObject); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = textureHash[textureObject]; + if (pTexture != nullptr) { + hsa_ext_image_destroy(*agent, pTexture->image); + hsa_ext_sampler_destroy(*agent, pTexture->sampler); + free(pTexture); + textureHash.erase(textureObject); + } + } + return ihipLogStatus(hip_status); +} + +hipError_t hipGetTextureObjectResourceDesc(hipResourceDesc* pResDesc, hipTextureObject_t textureObject) +{ + HIP_INIT_API(pResDesc, textureObject); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hipTexture* pTexture = textureHash[textureObject]; + if (pTexture != nullptr && pResDesc != nullptr) { + memcpy((void*)pResDesc, (void*)&(pTexture->resDesc), sizeof(hipResourceDesc)); + } + } + return ihipLogStatus(hip_status); +} + +hipError_t hipGetTextureObjectResourceViewDesc(hipResourceViewDesc* pResViewDesc, hipTextureObject_t textureObject) +{ + HIP_INIT_API(pResViewDesc, textureObject); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hipTexture* pTexture = textureHash[textureObject]; + if (pTexture != nullptr && pResViewDesc != nullptr) { + memcpy((void*)pResViewDesc, (void*)&(pTexture->resViewDesc), sizeof(hipResourceViewDesc)); + } + } + return ihipLogStatus(hip_status); +} + +hipError_t hipGetTextureObjectTextureDesc(hipTextureDesc* pTexDesc, hipTextureObject_t textureObject) +{ + HIP_INIT_API(pTexDesc, textureObject); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hipTexture* pTexture = textureHash[textureObject]; + if (pTexture != nullptr && pTexDesc != nullptr) { + memcpy((void*)pTexDesc, (void*)&(pTexture->texDesc), sizeof(hipTextureDesc)); + } + } + return ihipLogStatus(hip_status); +} + +// Texture Reference APIs +hipError_t ihipBindTextureImpl(int dim, + enum hipTextureReadMode readMode, + size_t *offset, + const void *devPtr, + const struct hipChannelFormatDesc& desc, + size_t size, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject) +{ + HIP_INIT_API(); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture)); + if (pTexture != nullptr) { + memset(pTexture, 0, sizeof(hipTexture)); + } + + hsa_ext_image_descriptor_t imageDescriptor; + + assert(dim == hipTextureType1D); + + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; + imageDescriptor.width = size; + imageDescriptor.height = 1; + imageDescriptor.depth = 1; + imageDescriptor.array_size = 0; + + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + getChannelOrderAndType(desc, readMode, channelOrder, channelType); + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_ext_sampler_descriptor_t samplerDescriptor; + fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + + if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) || + HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + return ihipLogStatus(hipErrorRuntimeOther); + } + getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); + textureHash[textureObject] = pTexture; + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipBindTexture(size_t* offset, + textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t size) +{ + // TODO: hipReadModeElementType is default. + return ihipBindTextureImpl(hipTextureType1D, hipReadModeElementType, + offset, devPtr, *desc, size, + tex->addressMode[0], tex->filterMode, tex->normalized, + tex->textureObject); +} + +hipError_t ihipBindTexture2DImpl(int dim, + enum hipTextureReadMode readMode, + size_t *offset, + const void *devPtr, + const struct hipChannelFormatDesc& desc, + size_t width, + size_t height, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject) +{ + HIP_INIT_API(); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture)); + if (pTexture != nullptr) { + memset(pTexture, 0, sizeof(hipTexture)); + } + + hsa_ext_image_descriptor_t imageDescriptor; + + assert(dim == hipTextureType2D); + + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + imageDescriptor.width = width; + imageDescriptor.height = height; + imageDescriptor.depth = 1; + imageDescriptor.array_size = 0; + + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + getChannelOrderAndType(desc, readMode, channelOrder, channelType); + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_ext_sampler_descriptor_t samplerDescriptor; + fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + + if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, devPtr, permission, &(pTexture->image)) || + HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + return ihipLogStatus(hipErrorRuntimeOther); + } + getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); + textureHash[textureObject] = pTexture; + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipBindTexture2D(size_t* offset, + textureReference* tex, + const void* devPtr, + const hipChannelFormatDesc* desc, + size_t width, + size_t height, + size_t pitch) +{ + // TODO: hipReadModeElementType is default. + return ihipBindTexture2DImpl(hipTextureType2D, hipReadModeElementType, + offset, devPtr, *desc, width, height, + tex->addressMode[0], tex->filterMode, tex->normalized, + tex->textureObject); +} + +hipError_t ihipBindTextureToArrayImpl(int dim, + enum hipTextureReadMode readMode, + hipArray_const_t array, + const struct hipChannelFormatDesc& desc, + enum hipTextureAddressMode addressMode, + enum hipTextureFilterMode filterMode, + int normalizedCoords, + hipTextureObject_t& textureObject) +{ + HIP_INIT_API(); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = (hipTexture*) malloc(sizeof(hipTexture)); + if (pTexture != nullptr) { + memset(pTexture, 0, sizeof(hipTexture)); + } + + hsa_ext_image_descriptor_t imageDescriptor; + + imageDescriptor.width = array->width; + imageDescriptor.height = array->height; + imageDescriptor.depth = array->depth; + imageDescriptor.array_size = 0; + + switch (dim) { + case hipTextureType1D: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1D; + imageDescriptor.height = 1; + imageDescriptor.depth = 1; + break; + case hipTextureType2D: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + imageDescriptor.depth = 1; + break; + case hipTextureType3D: + case hipTextureTypeCubemap: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D; + break; + case hipTextureType1DLayered: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA; + imageDescriptor.height = 1; + imageDescriptor.array_size = array->height; + break; + case hipTextureType2DLayered: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; + imageDescriptor.depth = 1; + imageDescriptor.array_size = array->depth; + break; + case hipTextureTypeCubemapLayered: + default: + break; + } + + hsa_ext_image_channel_order_t channelOrder; + hsa_ext_image_channel_type_t channelType; + getChannelOrderAndType(desc, readMode, channelOrder, channelType); + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_ext_sampler_descriptor_t samplerDescriptor; + fillSamplerDescriptor(samplerDescriptor, addressMode, filterMode, normalizedCoords); + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + + if (HSA_STATUS_SUCCESS != hsa_ext_image_create(*agent, &imageDescriptor, array->data, permission, &(pTexture->image)) || + HSA_STATUS_SUCCESS != hsa_ext_sampler_create(*agent, &samplerDescriptor, &(pTexture->sampler))) { + return ihipLogStatus(hipErrorRuntimeOther); + } + getHipTextureObject(&textureObject, pTexture->image, pTexture->sampler); + textureHash[textureObject] = pTexture; + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipBindTextureToArray(textureReference* tex, + hipArray_const_t array, + const hipChannelFormatDesc* desc) +{ + // TODO: hipReadModeElementType is default. + return ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType, + array, *desc, + tex->addressMode[0], tex->filterMode, tex->normalized, + tex->textureObject); +} + +hipError_t hipBindTextureToMipmappedArray(textureReference* tex, + hipMipmappedArray_const_t mipmappedArray, + const hipChannelFormatDesc* desc) +{ + return hipSuccess; +} + +hipError_t ihipUnbindTextureImpl(const hipTextureObject_t& textureObject) +{ + HIP_INIT_API(); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + hc::accelerator acc = ctx->getDevice()->_acc; + auto device = ctx->getWriteableDevice(); + + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + hipTexture* pTexture = textureHash[textureObject]; + if (pTexture != nullptr) { + hsa_ext_image_destroy(*agent, pTexture->image); + hsa_ext_sampler_destroy(*agent, pTexture->sampler); + free(pTexture); + textureHash.erase(textureObject); + } + } + + return ihipLogStatus(hip_status); +} + +hipError_t hipUnbindTexture(const textureReference* tex) +{ + return ihipUnbindTextureImpl(tex->textureObject); +} + +hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, hipArray_const_t array) +{ + HIP_INIT_API(desc, array); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + *desc = array->desc; + } + return ihipLogStatus(hip_status); +} + +hipError_t hipGetTextureAlignmentOffset(size_t* offset, const textureReference* tex) +{ + HIP_INIT_API(offset, tex); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + } + return ihipLogStatus(hip_status); +} + +hipError_t hipGetTextureReference(const textureReference** tex, const void* symbol) +{ + HIP_INIT_API(tex, symbol); + HIP_SET_DEVICE(); + + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + if (ctx) { + } + return ihipLogStatus(hip_status); +} diff --git a/include/hip/nvcc_detail/hip_texture.h b/src/hip_texture.h similarity index 78% rename from include/hip/nvcc_detail/hip_texture.h rename to src/hip_texture.h index c669d62192..365a02e103 100644 --- a/include/hip/nvcc_detail/hip_texture.h +++ b/src/hip_texture.h @@ -20,9 +20,17 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H -#define HIP_INCLUDE_HIP_NVCC_DETAIL_HIP_TEXTURE_H +#ifndef HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H +#define HIP_INCLUDE_HCC_DETAIL_HIP_TEXTURE_H -#include +#include + +struct hipTexture { + hipResourceDesc resDesc; + hipTextureDesc texDesc; + hipResourceViewDesc resViewDesc; + hsa_ext_image_t image; + hsa_ext_sampler_t sampler; +}; #endif diff --git a/src/math_functions.cpp b/src/math_functions.cpp index f66f0a4312..80ccece1a3 100644 --- a/src/math_functions.cpp +++ b/src/math_functions.cpp @@ -820,16 +820,6 @@ __host__ float modff(float x, float *iptr) return std::modf(x, iptr); } -__host__ float erfcinvf(float y) -{ - return __hip_host_erfcinvf(y); -} - -__host__ double erfcinv(double y) -{ - return __hip_host_erfcinv(y); -} - __host__ double fdivide(double x, double y) { return x/y; @@ -937,16 +927,6 @@ __host__ void sincospi(double x, double *sptr, double *cptr) *cptr = std::cos(HIP_PI*x); } -//__host__ float normcdfinvf(float x) -//{ -// return std::sqrt(2) * erfinvf(2*x-1); -//} - -//__host__ double normcdfinv(double x) -//{ -// return std::sqrt(2) * erfinv(2*x-1); -//} - __host__ float nextafterf(float x, float y) { return std::nextafter(x, y); diff --git a/tests/src/deviceLib/hipTestDeviceSymbol.cpp b/tests/src/deviceLib/hipTestDeviceSymbol.cpp index c2ffb5ce7d..140f3d97dc 100644 --- a/tests/src/deviceLib/hipTestDeviceSymbol.cpp +++ b/tests/src/deviceLib/hipTestDeviceSymbol.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM all + * BUILD: %t %s * RUN: %t * HIT_END */ @@ -31,16 +31,8 @@ THE SOFTWARE. #define NUM 1024 #define SIZE 1024*4 -// TODO - collapse: -#ifdef __HIP_PLATFORM_HCC__ -__device__ ADDRESS_SPACE_1 int globalIn[NUM]; -__device__ ADDRESS_SPACE_1 int globalOut[NUM]; -#endif - -#ifdef __HIP_PLATFORM_NVCC__ __device__ int globalIn[NUM]; __device__ int globalOut[NUM]; -#endif __global__ void Assign(hipLaunchParm lp, int* Out) { @@ -55,7 +47,7 @@ int main() A = new int[NUM]; B = new int[NUM]; C = new int[NUM]; - for(unsigned i=0;i +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "hip/device_functions.h" +#include "test_common.h" + +#define HIP_ASSERT(x) (assert((x)==hipSuccess)) + +__host__ __device__ void fence_system() { +#ifdef __HIP_DEVICE_COMPILE__ + __threadfence_system(); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); +#endif +} + +__host__ __device__ void round_robin(const int id, const int num_dev, const int num_iter, volatile int* data, volatile int* flag) { + for (int i = 0; i < num_iter; i++) { + while(*flag%num_dev != id) + fence_system(); // invalid the cache for read + + (*data)++; + fence_system(); // make sure the store to data is sequenced before the store to flag + (*flag)++; + fence_system(); // invalid the cache to flush out flag + } +} + +__global__ void gpu_round_robin(const int id, const int num_dev, const int num_iter, volatile int* data, volatile int* flag) { + round_robin(id, num_dev, num_iter, data, flag); +} + +int main() { + + int num_gpus = 0; + HIP_ASSERT(hipGetDeviceCount(&num_gpus)); + if (num_gpus == 0) { + passed(); + return 0; + } + + volatile int* data; + HIP_ASSERT(hipHostMalloc(&data, sizeof(int), hipHostMallocCoherent)); + constexpr int init_data = 1000; + *data = init_data; + + volatile int* flag; + HIP_ASSERT(hipHostMalloc(&flag, sizeof(int), hipHostMallocCoherent)); + *flag = 0; + + // number of rounds per device + constexpr int num_iter = 1000; + + // one CPU thread + 1 kernel/GPU + const int num_dev = num_gpus + 1; + + int next_id = 0; + std::vector threads; + + // create a CPU thread for the round_robin + threads.push_back(std::thread(round_robin, next_id++, num_dev, num_iter, data, flag)); + + // run one thread per GPU + dim3 dim_block(1,1,1); + dim3 dim_grid(1,1,1); + + // launch one kernel per device for the round robin + for (; next_id < num_dev; ++next_id) { + threads.push_back(std::thread([=]() { + HIP_ASSERT(hipSetDevice(next_id-1)); + hipLaunchKernelGGL(gpu_round_robin, dim_grid, dim_block, 0, 0x0 + , next_id, num_dev, num_iter, data, flag); + HIP_ASSERT(hipDeviceSynchronize()); + })); + } + + for (auto& t : threads) { + t.join(); + } + + int expected_data = init_data + num_dev * num_iter; + int expected_flag = num_dev * num_iter; + + bool passed = *data == expected_data + && *flag == expected_flag; + + HIP_ASSERT(hipHostFree((void*)data)); + HIP_ASSERT(hipHostFree((void*)flag)); + + if (passed) { + passed(); + } + else { + failed("Failed Verification!\n"); + } + + return 0; +} diff --git a/tests/src/hipEnvVar.cpp b/tests/src/hipEnvVar.cpp index d5942518e0..ff68103816 100644 --- a/tests/src/hipEnvVar.cpp +++ b/tests/src/hipEnvVar.cpp @@ -109,15 +109,14 @@ int main(int argc, char **argv) std::cout << devCount << std::endl; } if (retDevInfo) { - hipSetDevice(device); - hipDeviceProp_t devProp; + hipDevice_t deviceT; + hipDeviceGet(&deviceT, device); - hipGetDeviceProperties(&devProp, device); - if (devProp.major < 1) { - printf("%d does not support HIP\n", device); - return -1; - } - std::cout << devProp.pciBusID << std::endl; + char pciBusId[100]; + memset(pciBusId,0,100); + hipDeviceGetPCIBusId(pciBusId,100,deviceT); + + cout< #include #include "hip/hip_runtime.h" +#include +#include using namespace std; int getDeviceNumber(){ - FILE *in; - char buff[512]; - string str; - if(!(in = popen("./hipEnvVar -c", "r"))){ - return 1; + FILE *in; + char buff[512]; + string str; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + if(!(in = popen("./directed_tests/hipEnvVar -c", "r"))){ + return 1; + } + while(fgets(buff, 512, in)!=NULL){ + cout << buff; } - fgets(buff, sizeof(buff), in); pclose(in); return atoi(buff); } // Query the current device ID remotely to hipEnvVar -int getDevicePCIBusNumRemote(int deviceID){ +void getDevicePCIBusNumRemote(int deviceID, char* pciBusID){ FILE *in; - char buff[512]; - string str = "./hipEnvVar -d "; + string str = "./directed_tests/hipEnvVar -d "; str += std::to_string(deviceID); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); if(!(in = popen(str.c_str(), "r"))){ - return 1; + exit(1); + } + while(fgets(pciBusID, 100, in)!=NULL){ + cout << pciBusID; } - fgets(buff, sizeof(buff), in); pclose(in); - return atoi(buff); } -// Query the current device ID locally -int getDevicePCIBusNum(int deviceID){ - hipSetDevice(deviceID); - hipDeviceProp_t devProp; +// Query the current device ID locally on AMD path +void getDevicePCIBusNum(int deviceID, char* pciBusID){ + hipDevice_t deviceT; + hipDeviceGet(&deviceT, deviceID); - hipGetDeviceProperties(&devProp, deviceID); - if (devProp.major < 1) { - printf("%d does not support HIP\n", deviceID); - return -1; - } - return devProp.pciBusID; + memset(pciBusID,0,100); + hipDeviceGetPCIBusId(pciBusID,100,deviceT); } int main() { unsetenv("HIP_VISIBLE_DEVICES"); unsetenv("CUDA_VISIBLE_DEVICES"); + + std::vector devPCINum; + char pciBusID[100]; //collect the device pci bus ID for all devices int totalDeviceNum = getDeviceNumber(); std::cout << "The total number of available devices is " << totalDeviceNum<< std::endl <<"Valid index range is 0 - "< devPCINum; for (int i = 0; i < totalDeviceNum ; i++) { - devPCINum.push_back(getDevicePCIBusNum(i)); + getDevicePCIBusNum(i, pciBusID); + devPCINum.push_back(pciBusID); std::cout <<"The collected device PCI Bus ID of Device "< +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main( void ) { + char pciBusId[13]; + int deviceCount = 0; + HIPCHECK(hipGetDeviceCount(&deviceCount)); + HIPASSERT(deviceCount != 0); + for(int i=0; i< deviceCount;i++) { + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + int tempPciBusId = -1; + int tempDeviceId = -1; + HIPCHECK(hipDeviceGetPCIBusId ( &pciBusId[0], 13, i )); + sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID); + HIPCHECK(hipDeviceGetAttribute(&tempPciBusId,hipDeviceAttributePciBusId , i)); + if(pciBusID != tempPciBusId) { + exit(EXIT_FAILURE); + } + HIPCHECK(hipDeviceGetByPCIBusId ( &tempDeviceId, pciBusId )); + if(tempDeviceId != i) { + exit(EXIT_FAILURE); + } + } + passed(); +} diff --git a/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp b/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp new file mode 100644 index 0000000000..7fb0340a48 --- /dev/null +++ b/tests/src/runtimeApi/device/hipDeviceGetPCIBusId.cpp @@ -0,0 +1,51 @@ +/* +Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +int main( void ) { + char pciBusId[13]; + int deviceCount = 0; + HIPCHECK(hipGetDeviceCount(&deviceCount)); + HIPASSERT(deviceCount != 0); + for(int i=0; i< deviceCount;i++) { + int pciBusID = -1; + int pciDeviceID = -1; + int pciDomainID = -1; + int tempPciBusId = -1; + HIPCHECK(hipDeviceGetPCIBusId ( &pciBusId[0], 13, i )); + sscanf (pciBusId,"%04x:%02x:%02x",&pciDomainID,&pciBusID,&pciDeviceID); + HIPCHECK(hipDeviceGetAttribute(&tempPciBusId,hipDeviceAttributePciBusId , i)); + if(pciBusID != tempPciBusId) { + exit(EXIT_FAILURE); + } + } + passed(); +} diff --git a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp index 9fadebea1e..0c2c387c2f 100644 --- a/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp +++ b/tests/src/runtimeApi/memory/p2p_copy_coherency.cpp @@ -24,7 +24,7 @@ THE SOFTWARE. /* HIT_START * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 - * RUN: %t + * RUN: %t EXCLUDE_HIP_PLATFORM all * HIT_END */ diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index 9bbd43828c..f5b1b79550 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -446,9 +446,41 @@ int main(int argc, char *argv[]) if (p_tests & 0x1000) { - printf ("==> Test 0x1000 try null stream\n"); - hipStreamQuery(0/* try null stream*/); + printf ("==> Test 0x1000 simple null stream tests\n"); + // try some null stream: + hipStreamQuery(0); + + + hipStream_t s1; + hipEvent_t e1; + + { + // stream null waits on event in s1 stream: + HIPCHECK(hipStreamCreate(&s1)); + HIPCHECK(hipEventCreate(&e1)); + + HIPCHECK(hipEventRecord(e1, s1)) + + HIPCHECK(hipStreamWaitEvent(hipStream_t(0), e1, 0/*flags*/)); + + HIPCHECK(hipStreamDestroy(s1)); + HIPCHECK(hipEventDestroy(e1)); + } + + { + // stream s1 waits on event in null stream: + HIPCHECK(hipStreamCreate(&s1)); + HIPCHECK(hipEventCreate(&e1)); + + HIPCHECK(hipEventRecord(e1, hipStream_t(0))) + + HIPCHECK(hipStreamWaitEvent(s1, e1, 0/*flags*/)); + + HIPCHECK(hipStreamDestroy(s1)); + HIPCHECK(hipEventDestroy(e1)); + } + } @@ -471,8 +503,8 @@ int main(int argc, char *argv[]) } - { - printf ("test: alternating memcpy/count-reverse followed by event\n"); + if (p_tests & 0x4000 ) { + printf ("test: %x alternating memcpy/count-reverse followed by event\n", p_tests); RUN_SYNC_TEST(0x4000, streamersDev0, sync_queryAllUntilComplete(streamersDev0), true); RUN_SYNC_TEST(0x8000, streamersDev0, sync_streamWaitEvent(streamersDev0.back()->event(), 0, sideStreams[0], false), true); } diff --git a/tests/src/test_common.h b/tests/src/test_common.h index f585fb8bca..e3cc664d56 100644 --- a/tests/src/test_common.h +++ b/tests/src/test_common.h @@ -23,7 +23,7 @@ THE SOFTWARE. #include #include "hip/hip_runtime.h" -#include "hip/hip_texture.h" +#include "hip/hip_texture_types.h" #include "hip/hip_runtime_api.h" #define HC __attribute__((hc))