diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46355b5a85..0a8c22ff54 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -200,7 +200,7 @@ if(HIP_PLATFORM STREQUAL "hcc")
 
     execute_process(COMMAND ${HCC_HOME}/bin/hcc-config --ldflags OUTPUT_VARIABLE HCC_LD_FLAGS)
     set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${HCC_LD_FLAGS} -Wl,-Bsymbolic")
-    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900")
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --amdgpu-target=gfx701 --amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906")
     if(COMPILE_HIP_ATP_MARKER)
         set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -L/opt/rocm/profiler/CXLActivityLogger/bin/x86_64 -lCXLActivityLogger")
     endif()
@@ -214,7 +214,10 @@ if(HIP_PLATFORM STREQUAL "hcc")
     foreach(TARGET hip_hcc hip_hcc_static hip_device)
         target_include_directories(${TARGET} SYSTEM INTERFACE $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>;${HSA_PATH}/include)
     endforeach()
-    target_link_libraries(hip_hcc INTERFACE hcc::hccrt;hcc::hc_am)
+    add_library(host INTERFACE)
+    target_link_libraries(host INTERFACE hip_hcc)
+    add_library(device INTERFACE)
+    target_link_libraries(device INTERFACE host hip_device hcc::hccrt hcc::hc_am)
 
     # Generate .hipInfo
     file(WRITE "${PROJECT_BINARY_DIR}/.hipInfo" ${_buildInfo})
@@ -264,7 +267,7 @@ set(BIN_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/bin)
 set(CONFIG_PACKAGE_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/hip)
 
 if(HIP_PLATFORM STREQUAL "hcc")
-    install(TARGETS hip_hcc_static hip_hcc hip_device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
+    install(TARGETS hip_hcc_static hip_hcc hip_device host device EXPORT hip-targets DESTINATION ${LIB_INSTALL_DIR})
     install(EXPORT hip-targets DESTINATION ${CONFIG_PACKAGE_INSTALL_DIR} NAMESPACE hip::)
     include(CMakePackageConfigHelpers)
 
diff --git a/bin/hipcc b/bin/hipcc
index f796b0bf95..7fafc30053 100755
--- a/bin/hipcc
+++ b/bin/hipcc
@@ -50,6 +50,8 @@ $verbose = $ENV{'HIPCC_VERBOSE'} // 0;
 # Verbose: 0x1=commands, 0x2=paths, 0x4=hipcc args
 
 $HIP_PATH=$ENV{'HIP_PATH'} // dirname (dirname $0); # use parent directory of hipcc
+$HIP_CLANG_PATH=$ENV{'HIP_CLANG_PATH'};
+$DEVICE_LIB_PATH=$ENV{'DEVICE_LIB_PATH'};
 
 #---
 # Read .hipInfo
@@ -62,6 +64,10 @@ $HIP_PLATFORM= `$HIP_PATH/bin/hipconfig --platform` // "hcc";
 $HIP_VERSION= `$HIP_PATH/bin/hipconfig --version`;
 ($HIP_VERSION_MAJOR, $HIP_VERSION_MINOR, $HIP_VERSION_PATCH) = split(/\./, $HIP_VERSION);
 
+if (defined $HIP_CLANG_PATH) {
+  $HIP_PLATFORM = "clang"
+}
+
 if ($verbose & 0x2) {
     print ("HIP_PATH=$HIP_PATH\n");
     print ("HIP_PLATFORM=$HIP_PLATFORM\n");
@@ -75,9 +81,19 @@ $target_gfx801 = 0;
 $target_gfx802 = 0;
 $target_gfx803 = 0;
 $target_gfx900 = 0;
+$target_gfx906 = 0;
 $default_amdgpu_target = 1;
 
-if ($HIP_PLATFORM eq "hcc") {
+if ($HIP_PLATFORM eq "clang") {
+    if ($verbose & 0x2) {
+        print ("HIP_CLANG_PATH=$HIP_CLANG_PATH\n");
+    }
+    $ROCM_PATH=$ENV{'ROCM_PATH'} // "/opt/rocm";
+    $HIPCC="$HIP_CLANG_PATH/clang++";
+    $HIPCXXFLAGS .= "-std=c++11 -I$HIP_PATH/include";
+
+    $HIPLDFLAGS = "--hip-link --hip-device-lib-path=$DEVICE_LIB_PATH -L$HIP_PATH/lib -lhip_hcc";
+} elsif ($HIP_PLATFORM eq "hcc") {
     $HSA_PATH=$ENV{'HSA_PATH'} // "/opt/rocm/hsa";
 
     $HCC_HOME=$ENV{'HCC_HOME'} // $hipConfig{'HCC_HOME'} // "/opt/rocm/hcc";
@@ -281,6 +297,16 @@ foreach $arg (@ARGV)
         $target_gfx900 = 1;
         $default_amdgpu_target = 0;
     }
+    if($arg eq '--amdgpu-target=gfx906')
+    {
+        $target_gfx906 = 1;
+        $default_amdgpu_target = 0;
+    }
+
+    # hip-clang does not accept --amdgpu-target= options.
+    if (($arg =~ /--amdgpu-target=/) and $HIP_PLATFORM eq 'clang' ) {
+        $swallowArg = 1;
+    }
 
     if(($trimarg eq '-stdlib=libstdc++') and ($setStdLib eq 0))
     {
@@ -330,10 +356,16 @@ foreach $arg (@ARGV)
         if (($arg =~ /\.cpp$/) or ($arg =~ /\.c$/) or ($arg =~ /\.cc$/) ) {
             $hasC = 1;
             $needCXXFLAGS = 1;
+            if ($HIP_PLATFORM eq 'clang') {
+                $toolArgs .= " -x hip"
+            }
         }
         if (($arg =~ /\.cu$/) or ($arg =~ /\.cuh$/)) {
             $hasCU = 1;
             $needCXXFLAGS = 1;
+            if ($HIP_PLATFORM eq 'clang') {
+                $toolArgs .= " -x hip"
+            }
         }
 
         push (@inputs, $arg);
@@ -342,7 +374,7 @@ foreach $arg (@ARGV)
     $toolArgs .= " $arg" unless $swallowArg;
 }
 
-if($HIP_PLATFORM eq "hcc"){
+if($HIP_PLATFORM eq "hcc" or $HIP_PLATFORM eq "clang"){
     # No AMDGPU target specified at commandline. So look for HCC_AMDGPU_TARGET
     if($default_amdgpu_target eq 1 and defined $ENV{HCC_AMDGPU_TARGET})
     {
@@ -373,6 +405,11 @@ if($HIP_PLATFORM eq "hcc"){
                 $target_gfx900 = 1;
                 $default_amdgpu_target = 0;
             }
+            if($target eq 'gfx906')
+            {
+                $target_gfx906 = 1;
+                $default_amdgpu_target = 0;
+            }
         }
     }
     # Else try using rocm_agent_enumerator
@@ -404,6 +441,10 @@ if($HIP_PLATFORM eq "hcc"){
                 $target_gfx900 = 1;
                 $default_amdgpu_target = 0;
             }
+            if($val eq "gfx906") {
+                $target_gfx906 = 1;
+                $default_amdgpu_target = 0;
+            }
         }
     }
     # rocm_agent_enumerator failed! Throw an error and die if linking is required
@@ -414,29 +455,59 @@ if($HIP_PLATFORM eq "hcc"){
 
     $ENV{HCC_EXTRA_LIBRARIES}="$HIP_PATH/lib/hip_hc.ll\n";
 
+    if($HIP_PLATFORM eq "hcc") {
+        $GPU_ARCH_OPT = " --amdgpu-target=";
+    } else {
+        $GPU_ARCH_OPT = " --cuda-gpu-arch=";
+    }
     # Handle ROCm target platform
     if ($target_gfx701 eq 1) {
-        $HIPLDFLAGS .= " --amdgpu-target=gfx701";
+        $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx701";
+        $HIPLDFLAGS .= $GPU_ARCH_ARG;
+        if ($HIP_PLATFORM eq 'clang') {
+            $HIPCXXFLAGS .= $GPU_ARCH_ARG;;
+        }
         $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX701__=1 ";
     }
     if ($target_gfx801 eq 1) {
-        $HIPLDFLAGS .= " --amdgpu-target=gfx801";
+        $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx801";
+        $HIPLDFLAGS .= $GPU_ARCH_ARG;
+        if ($HIP_PLATFORM eq 'clang') {
+            $HIPCXXFLAGS .= $GPU_ARCH_ARG;;
+        }
         $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX801__=1 ";
     }
     if ($target_gfx802 eq 1) {
-        $HIPLDFLAGS .= " --amdgpu-target=gfx802";
+        $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx802";
+        $HIPLDFLAGS .= $GPU_ARCH_ARG;
+        if ($HIP_PLATFORM eq 'clang') {
+            $HIPCXXFLAGS .= $GPU_ARCH_ARG;;
+        }
         $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX802__=1 ";
     }
     if ($target_gfx803 eq 1) {
-        $HIPLDFLAGS .= " --amdgpu-target=gfx803";
+        $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx803";
+        $HIPLDFLAGS .= $GPU_ARCH_ARG;
+        if ($HIP_PLATFORM eq 'clang') {
+            $HIPCXXFLAGS .= $GPU_ARCH_ARG;;
+        }
         $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX803__=1 ";
         $ENV{HCC_EXTRA_LIBRARIES_GFX803}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
     }
     if ($target_gfx900 eq 1) {
-        $HIPLDFLAGS .= " --amdgpu-target=gfx900";
+        $GPU_ARCH_ARG = $GPU_ARCH_OPT . "gfx900";
+        $HIPLDFLAGS .= $GPU_ARCH_ARG;
+        if ($HIP_PLATFORM eq 'clang') {
+            $HIPCXXFLAGS .= $GPU_ARCH_ARG;;
+        }
         $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX900__=1 ";
         $ENV{HCC_EXTRA_LIBRARIES_GFX900}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
     }
+    if ($target_gfx906 eq 1) {
+        $HIPLDFLAGS .= " --amdgpu-target=gfx906";
+        $HIPCXXFLAGS .= " -D__HIP_ARCH_GFX906__=1 ";
+        $ENV{HCC_EXTRA_LIBRARIES_GFX906}="$HIP_PATH/lib/hip_hc_gfx803.ll\n";
+    }
 }
 
 if ($hasC and $HIP_PLATFORM eq 'nvcc') {
@@ -445,6 +516,7 @@ if ($hasC and $HIP_PLATFORM eq 'nvcc') {
 if ($hasCU and $HIP_PLATFORM eq 'hcc') {
     $HIPCXXFLAGS .= " -x c++";
 }
+
 if ($buildDeps and $HIP_PLATFORM eq 'nvcc') {
     $HIPCXXFLAGS .= " -M -D__CUDACC__";
 }
diff --git a/hip-config.cmake.in b/hip-config.cmake.in
index 7e4468b94a..efcdf708bb 100644
--- a/hip-config.cmake.in
+++ b/hip-config.cmake.in
@@ -51,7 +51,7 @@ set_and_check(hip_HIPCONFIG_EXECUTABLE "${hip_BIN_INSTALL_DIR}/hipconfig")
 find_dependency(hcc)
 include( "${CMAKE_CURRENT_LIST_DIR}/hip-targets.cmake" )
 
-set( hip_LIBRARIES hip::hip_hcc)
+set( hip_LIBRARIES hip::host hip::device)
 set( hip_LIBRARY ${hip_LIBRARIES})
 
 set(HIP_INCLUDE_DIR ${hip_INCLUDE_DIR})
diff --git a/hipify-clang/src/CUDA2HipMap.cpp b/hipify-clang/src/CUDA2HipMap.cpp
index 6aa9e733f2..7d10b35e48 100644
--- a/hipify-clang/src/CUDA2HipMap.cpp
+++ b/hipify-clang/src/CUDA2HipMap.cpp
@@ -2920,6 +2920,18 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
     // unchanged function names: skipahead, skipahead_sequence, skipahead_subsequence
 
     ///////////////////////////// cuDNN /////////////////////////////
+    // defines
+    {"CUDNN_VERSION",                                 {"HIPDNN_VERSION",                                 CONV_NUMERIC_LITERAL, API_DNN}},    //  7000
+    {"CUDNN_DIM_MAX",                                 {"HIPDNN_DIM_MAX",                                 CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    //  8
+    {"CUDNN_LRN_MIN_N",                               {"HIPDNN_LRN_MIN_N",                               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    //  1
+    {"CUDNN_LRN_MAX_N",                               {"HIPDNN_LRN_MAX_N",                               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 16
+    {"CUDNN_LRN_MIN_K",                               {"HIPDNN_LRN_MIN_K",                               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 1e-5
+    {"CUDNN_LRN_MIN_BETA",                            {"HIPDNN_LRN_MIN_BETA",                            CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0.01
+    {"CUDNN_BN_MIN_EPSILON",                          {"HIPDNN_BN_MIN_EPSILON",                          CONV_NUMERIC_LITERAL, API_DNN}},    // 1e-5
+    {"CUDNN_SEV_ERROR_EN",                            {"HIPDNN_SEV_ERROR_EN",                            CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_SEV_WARNING_EN",                          {"HIPDNN_SEV_WARNING_EN",                          CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_SEV_INFO_EN",                             {"HIPDNN_SEV_INFO_EN",                             CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},
+
     {"cudnnContext",                                  {"hipdnnContext",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnHandle_t",                                 {"hipdnnHandle_t",                                 CONV_TYPE, API_DNN}},
     {"cudnnStatus_t",                                 {"hipdnnStatus_t",                                 CONV_TYPE, API_DNN}},
@@ -2956,6 +2968,15 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
     {"CUDNN_DATA_INT8x4",                             {"HIPDNN_DATA_INT8x4",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 5
     {"CUDNN_DATA_UINT8",                              {"HIPDNN_DATA_UINT8",                              CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},  // 6
     {"CUDNN_DATA_UINT8x4",                            {"HIPDNN_DATA_UINT8x4",                            CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},  // 7
+    {"cudnnErrQueryMode_t",                           {"hipdnnErrQueryMode_t",                           CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_ERRQUERY_RAWCODE",                        {"HIPDNN_ERRQUERY_RAWCODE",                        CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
+    {"CUDNN_ERRQUERY_NONBLOCKING",                    {"HIPDNN_ERRQUERY_NONBLOCKING",                    CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 1
+    {"CUDNN_ERRQUERY_BLOCKING",                       {"HIPDNN_ERRQUERY_BLOCKING",                       CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 2
+    {"cudnnSeverity_t",                               {"hipdnnSeverity_t",                               CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_SEV_FATAL",                               {"HIPDNN_SEV_FATAL",                               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
+    {"CUDNN_SEV_ERROR",                               {"HIPDNN_SEV_ERROR",                               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 1
+    {"CUDNN_SEV_WARNING",                             {"HIPDNN_SEV_WARNING",                             CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 2
+    {"CUDNN_SEV_INFO",                                {"HIPDNN_SEV_INFO",                                CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 3
 
     {"cudnnConvolutionFwdAlgo_t",                           {"hipdnnConvolutionFwdAlgo_t",                           CONV_TYPE, API_DNN}},
     {"CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM",            {"HIPDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM",            CONV_NUMERIC_LITERAL, API_DNN}},    // 0
@@ -2967,34 +2988,328 @@ const std::map<llvm::StringRef, hipCounter> CUDA_IDENTIFIER_MAP{
     {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD",                 {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD",                 CONV_NUMERIC_LITERAL, API_DNN}},    // 6
     {"CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED",        {"HIPDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED",        CONV_NUMERIC_LITERAL, API_DNN}},    // 7
     {"CUDNN_CONVOLUTION_FWD_ALGO_COUNT",                    {"HIPDNN_CONVOLUTION_FWD_ALGO_COUNT",                    CONV_NUMERIC_LITERAL, API_DNN}},    // 8
-
     {"cudnnConvolutionFwdPreference_t",                     {"hipdnnConvolutionFwdPreference_t",                     CONV_TYPE, API_DNN}},
     {"CUDNN_CONVOLUTION_FWD_NO_WORKSPACE",                  {"HIPDNN_CONVOLUTION_FWD_NO_WORKSPACE",                  CONV_NUMERIC_LITERAL, API_DNN}},    // 0
     {"CUDNN_CONVOLUTION_FWD_PREFER_FASTEST",                {"HIPDNN_CONVOLUTION_FWD_PREFER_FASTEST",                CONV_NUMERIC_LITERAL, API_DNN}},    // 1
     {"CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT",       {"HIPDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT",       CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"cudnnDeterminism_t",                                  {"hipdnnDeterminism_t",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_NON_DETERMINISTIC",                             {"HIPDNN_NON_DETERMINISTIC",                             CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
+    {"CUDNN_DETERMINISTIC",                                 {"HIPDNN_DETERMINISTIC",                                 CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 1
+    {"cudnnDivNormMode_t",                                  {"hipdnnDivNormMode_t",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_DIVNORM_PRECOMPUTED_MEANS",                     {"HIPDNN_DIVNORM_PRECOMPUTED_MEANS",                     CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
+    {"cudnnCTCLossAlgo_t",                                  {"hipdnnCTCLossAlgo_t",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_CTC_LOSS_ALGO_DETERMINISTIC",                   {"HIPDNN_CTC_LOSS_ALGO_DETERMINISTIC",                   CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
+    {"CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC",               {"HIPDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC",               CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 1
 
     {"cudnnFilterDescriptor_t",                             {"hipdnnFilterDescriptor_t",                             CONV_TYPE, API_DNN}},
+    {"cudnnDropoutDescriptor_t",                            {"hipdnnDropoutDescriptor_t",                            CONV_TYPE, API_DNN}},
+    {"cudnnConvolutionFwdAlgoPerf_t",                       {"hipdnnConvolutionFwdAlgoPerf_t",                       CONV_TYPE, API_DNN}},
+    {"cudnnConvolutionBwdFilterAlgoPerf_t",                 {"hipdnnConvolutionBwdFilterAlgoPerf_t",                 CONV_TYPE, API_DNN}},
+    {"cudnnRNNDescriptor_t",                                {"hipdnnRNNDescriptor_t",                                CONV_TYPE, API_DNN}},
+    {"cudnnPersistentRNNPlan",                              {"hipdnnPersistentRNNPlan",                              CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnPersistentRNNPlan_t",                            {"hipdnnPersistentRNNPlan_t",                            CONV_TYPE, API_DNN}},
+    {"cudnnTensorStruct",                                   {"hipdnnTensorStruct",                                   CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnConvolutionStruct",                              {"hipdnnConvolutionStruct",                              CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnPoolingStruct",                                  {"hipdnnPoolingStruct",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnPoolingDescriptor_t",                            {"hipdnnPoolingDescriptor_t",                            CONV_TYPE, API_DNN}},
+    {"cudnnFilterStruct",                                   {"hipdnnFilterStruct",                                   CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnLRNDescriptor_t",                                {"hipdnnLRNDescriptor_t",                                CONV_TYPE, API_DNN}},
+    {"cudnnLRNStruct",                                      {"hipdnnLRNStruct",                                      CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnActivationDescriptor_t",                         {"hipdnnActivationDescriptor_t",                         CONV_TYPE, API_DNN}},
+    {"cudnnActivationStruct",                               {"hipdnnActivationStruct",                               CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTransformerDescriptor_t",                 {"hipdnnSpatialTransformerDescriptor_t",                 CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTransformerStruct",                       {"hipdnnSpatialTransformerStruct",                       CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnOpTensorDescriptor_t",                           {"hipdnnOpTensorDescriptor_t",                           CONV_TYPE, API_DNN}},
+    {"cudnnOpTensorStruct",                                 {"hipdnnOpTensorStruct",                                 CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnReduceTensorDescriptor_t",                       {"hipdnnReduceTensorDescriptor_t",                       CONV_TYPE, API_DNN}},
+    {"cudnnReduceTensorStruct",                             {"hipdnnReduceTensorStruct",                             CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCTCLossDescriptor_t",                            {"hipdnnCTCLossDescriptor_t",                            CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCTCLossStruct",                                  {"hipdnnCTCLossStruct",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnConvolutionBwdDataAlgoPerf_t",                   {"hipdnnConvolutionBwdDataAlgoPerf_t",                   CONV_TYPE, API_DNN}},
+    {"cudnnAlgorithmDescriptor_t",                          {"hipdnnAlgorithmDescriptor_t",                          CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnAlgorithmStruct",                                {"hipdnnAlgorithmStruct",                                CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnAlgorithmPerformance_t",                         {"hipdnnAlgorithmPerformance_t",                         CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnAlgorithmPerformanceStruct",                     {"hipdnnAlgorithmPerformanceStruct",                     CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnRNNStruct",                                      {"hipdnnRNNStruct",                                      CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnRNNDescriptor_t",                                {"hipdnnRNNDescriptor_t",                                CONV_TYPE, API_DNN}},
+    {"cudnnAlgorithm_t",                                    {"hipdnnAlgorithm_t",                                    CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCallback_t",                                     {"hipdnnCallback_t",                                     CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDebug_t",                                        {"hipdnnDebug_t",                                        CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+
+    {"cudnnLRNMode_t",                                      {"hipdnnLRNMode_t",                                      CONV_TYPE, API_DNN}},
+    {"CUDNN_LRN_CROSS_CHANNEL_DIM1",                        {"HIPDNN_LRN_CROSS_CHANNEL",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 0 vs 1
+    {"cudnnRNNInputMode_t",                                 {"hipdnnRNNInputMode_t",                                 CONV_TYPE, API_DNN}},
+    {"CUDNN_LINEAR_INPUT",                                  {"HIPDNN_LINEAR_INPUT",                                  CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_SKIP_INPUT",                                    {"HIPDNN_SKIP_INPUT",                                    CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnDirectionMode_t",                                {"hipdnnDirectionMode_t",                                CONV_TYPE, API_DNN}},
+    {"CUDNN_UNIDIRECTIONAL",                                {"HIPDNN_UNIDIRECTIONAL",                                CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_BIDIRECTIONAL",                                 {"HIPDNN_BIDIRECTIONAL",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnMathType_t",                                     {"hipdnnMathType_t",                                     CONV_TYPE, API_DNN}},
+    {"CUDNN_DEFAULT_MATH",                                  {"HIPDNN_DEFAULT_MATH",                                  CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_TENSOR_OP_MATH",                                {"HIPDNN_TENSOR_OP_MATH",                                CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnNanPropagation_t",                               {"hipdnnNanPropagation_t",                               CONV_TYPE, API_DNN}},
+    {"CUDNN_NOT_PROPAGATE_NAN",                             {"HIPDNN_NOT_PROPAGATE_NAN",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_PROPAGATE_NAN",                                 {"HIPDNN_PROPAGATE_NAN",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnConvolutionBwdDataAlgo_t",                       {"hipdnnConvolutionBwdDataAlgo_t",                       CONV_TYPE, API_DNN}},
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_0",                   {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_0",                   CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_1",                   {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_1",                   CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT",                 {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT",                 CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING",          {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING",          CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD",            {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD",            CONV_NUMERIC_LITERAL, API_DNN}},    // 4
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED",   {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED",   CONV_NUMERIC_LITERAL, API_DNN}},    // 5
+    {"CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT",               {"HIPDNN_CONVOLUTION_BWD_DATA_ALGO_TRANSPOSE_GEMM",      CONV_NUMERIC_LITERAL, API_DNN}},    // 6
+    {"cudnnConvolutionBwdFilterAlgo_t",                     {"hipdnnConvolutionBwdFilterAlgo_t",                     CONV_TYPE, API_DNN}},
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0",                 {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_0",                 CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1",                 {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_1",                 CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT",               {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT",               CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3",                 {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_3",                 CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD",          {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD",          CONV_NUMERIC_LITERAL, API_DNN}},    // 4
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED", CONV_NUMERIC_LITERAL, API_DNN}},    // 5
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING",        {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING",        CONV_NUMERIC_LITERAL, API_DNN}},    // 6
+    {"CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT",             {"HIPDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT",             CONV_NUMERIC_LITERAL, API_DNN}},    // 7
+    {"cudnnConvolutionBwdFilterPreference_t",               {"hipdnnConvolutionBwdFilterPreference_t",               CONV_TYPE, API_DNN}},
+    {"CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE",           {"HIPDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE",           CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST",         {"HIPDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST",         CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",{"HIPDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT",CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"cudnnRNNAlgo_t",                                      {"hipdnnRNNAlgo_t",                                      CONV_TYPE, API_DNN}},
+    {"CUDNN_RNN_ALGO_STANDARD",                             {"HIPDNN_RNN_ALGO_STANDARD",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_RNN_ALGO_PERSIST_STATIC",                       {"HIPDNN_RNN_ALGO_PERSIST_STATIC",                       CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_RNN_ALGO_PERSIST_DYNAMIC",                      {"HIPDNN_RNN_ALGO_PERSIST_DYNAMIC",                      CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_RNN_ALGO_COUNT",                                {"HIPDNN_RNN_ALGO_COUNT",                                CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 3
+    {"cudnnRNNMode_t",                                      {"hipdnnRNNMode_t",                                      CONV_TYPE, API_DNN}},
+    {"CUDNN_RNN_RELU",                                      {"HIPDNN_RNN_RELU",                                      CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_RNN_TANH",                                      {"HIPDNN_RNN_TANH",                                      CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_LSTM",                                          {"HIPDNN_LSTM",                                          CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_GRU",                                           {"HIPDNN_GRU",                                           CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"cudnnOpTensorOp_t",                                   {"hipdnnOpTensorOp_t",                                   CONV_TYPE, API_DNN}},
+    {"CUDNN_OP_TENSOR_ADD",                                 {"HIPDNN_OP_TENSOR_ADD",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_OP_TENSOR_MUL",                                 {"HIPDNN_OP_TENSOR_MUL",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_OP_TENSOR_MIN",                                 {"HIPDNN_OP_TENSOR_MIN",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_OP_TENSOR_MAX",                                 {"HIPDNN_OP_TENSOR_MAX",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"CUDNN_OP_TENSOR_SQRT",                                {"HIPDNN_OP_TENSOR_SQRT",                                CONV_NUMERIC_LITERAL, API_DNN}},    // 4
+    {"CUDNN_OP_TENSOR_NOT",                                 {"HIPDNN_OP_TENSOR_NOT",                                 CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 5
+    {"cudnnReduceTensorOp_t",                               {"hipdnnReduceTensorOp_t",                               CONV_TYPE, API_DNN}},
+    {"CUDNN_REDUCE_TENSOR_ADD",                             {"HIPDNN_REDUCE_TENSOR_ADD",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_REDUCE_TENSOR_MUL",                             {"HIPDNN_REDUCE_TENSOR_MUL",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_REDUCE_TENSOR_MIN",                             {"HIPDNN_REDUCE_TENSOR_MIN",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_REDUCE_TENSOR_MAX",                             {"HIPDNN_REDUCE_TENSOR_MAX",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"CUDNN_REDUCE_TENSOR_AMAX",                            {"HIPDNN_REDUCE_TENSOR_AMAX",                            CONV_NUMERIC_LITERAL, API_DNN}},    // 4
+    {"CUDNN_REDUCE_TENSOR_AVG",                             {"HIPDNN_REDUCE_TENSOR_AVG",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 5
+    {"CUDNN_REDUCE_TENSOR_NORM1",                           {"HIPDNN_REDUCE_TENSOR_NORM1",                           CONV_NUMERIC_LITERAL, API_DNN}},    // 6
+    {"CUDNN_REDUCE_TENSOR_NORM2",                           {"HIPDNN_REDUCE_TENSOR_NORM2",                           CONV_NUMERIC_LITERAL, API_DNN}},    // 7
+    {"CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS",                    {"HIPDNN_REDUCE_TENSOR_MUL_NO_ZEROS",                    CONV_NUMERIC_LITERAL, API_DNN}},    // 8
+    {"cudnnReduceTensorIndices_t",                          {"hipdnnReduceTensorIndices_t",                          CONV_TYPE, API_DNN}},
+    {"CUDNN_REDUCE_TENSOR_NO_INDICES",                      {"HIPDNN_REDUCE_TENSOR_NO_INDICES",                      CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_REDUCE_TENSOR_FLATTENED_INDICES",               {"HIPDNN_REDUCE_TENSOR_FLATTENED_INDICES",               CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnConvolutionBwdDataPreference_t",                 {"hipdnnConvolutionBwdDataPreference_t",                 CONV_TYPE, API_DNN}},
+    {"CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE",             {"HIPDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE",             CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST",           {"HIPDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST",           CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT",  {"HIPDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT",  CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"cudnnIndicesType_t",                                  {"hipdnnIndicesType_t",                                  CONV_TYPE, API_DNN}},
+    {"CUDNN_32BIT_INDICES",                                 {"HIPDNN_32BIT_INDICES",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_64BIT_INDICES",                                 {"HIPDNN_64BIT_INDICES",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_16BIT_INDICES",                                 {"HIPDNN_16BIT_INDICES",                                 CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_8BIT_INDICES",                                  {"HIPDNN_8BIT_INDICES",                                  CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"cudnnSoftmaxAlgorithm_t",                             {"hipdnnSoftmaxAlgorithm_t",                             CONV_TYPE, API_DNN}},
+    {"CUDNN_SOFTMAX_FAST",                                  {"HIPDNN_SOFTMAX_FAST",                                  CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_SOFTMAX_ACCURATE",                              {"HIPDNN_SOFTMAX_ACCURATE",                              CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_SOFTMAX_LOG",                                   {"HIPDNN_SOFTMAX_LOG",                                   CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"cudnnSoftmaxMode_t",                                  {"hipdnnSoftmaxMode_t",                                  CONV_TYPE, API_DNN}},
+    {"CUDNN_SOFTMAX_MODE_INSTANCE",                         {"HIPDNN_SOFTMAX_MODE_INSTANCE",                         CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_SOFTMAX_MODE_CHANNEL",                          {"HIPDNN_SOFTMAX_MODE_CHANNEL",                          CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"cudnnPoolingMode_t",                                  {"hipdnnPoolingMode_t",                                  CONV_TYPE, API_DNN}},
+    {"CUDNN_POOLING_MAX",                                   {"HIPDNN_POOLING_MAX",                                   CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING",         {"HIPDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING",         CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING",         {"HIPDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING",         CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_POOLING_MAX_DETERMINISTIC",                     {"HIPDNN_POOLING_MAX_DETERMINISTIC",                     CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"cudnnActivationMode_t",                               {"hipdnnActivationMode_t",                               CONV_TYPE, API_DNN}},
+    {"CUDNN_ACTIVATION_SIGMOID",                            {"HIPDNN_ACTIVATION_SIGMOID",                            CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_ACTIVATION_RELU",                               {"HIPDNN_ACTIVATION_RELU",                               CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_ACTIVATION_TANH",                               {"HIPDNN_ACTIVATION_TANH",                               CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"CUDNN_ACTIVATION_CLIPPED_RELU",                       {"HIPDNN_ACTIVATION_CLIPPED_RELU",                       CONV_NUMERIC_LITERAL, API_DNN}},    // 3
+    {"CUDNN_ACTIVATION_ELU",                                {"HIPDNN_ACTIVATION_ELU",                                CONV_NUMERIC_LITERAL, API_DNN}},    // 4
+    {"CUDNN_ACTIVATION_IDENTITY",                           {"HIPDNN_ACTIVATION_PATHTRU",                            CONV_NUMERIC_LITERAL, API_DNN}},    // 5
+    {"cudnnBatchNormMode_t",                                {"hipdnnBatchNormMode_t",                                CONV_TYPE, API_DNN}},
+    {"CUDNN_BATCHNORM_PER_ACTIVATION",                      {"HIPDNN_BATCHNORM_PER_ACTIVATION",                      CONV_NUMERIC_LITERAL, API_DNN}},    // 0
+    {"CUDNN_BATCHNORM_SPATIAL",                             {"HIPDNN_BATCHNORM_SPATIAL",                             CONV_NUMERIC_LITERAL, API_DNN}},    // 1
+    {"CUDNN_BATCHNORM_SPATIAL_PERSISTENT",                  {"HIPDNN_BATCHNORM_SPATIAL_PERSISTENT",                  CONV_NUMERIC_LITERAL, API_DNN}},    // 2
+    {"cudnnSamplerType_t",                                  {"hipdnnSamplerType_t",                                  CONV_TYPE, API_DNN, HIP_UNSUPPORTED}},
+    {"CUDNN_SAMPLER_BILINEAR",                              {"HIPDNN_SAMPLER_BILINEAR",                              CONV_NUMERIC_LITERAL, API_DNN, HIP_UNSUPPORTED}},    // 0
 
     {"cudnnGetVersion",                               {"hipdnnGetVersion",                               CONV_VERSION, API_DNN}},
     {"cudnnGetCudartVersion",                         {"hipdnnGetCudartVersion",                         CONV_VERSION, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnQueryRuntimeError",                        {"hipdnnQueryRuntimeError",                        CONV_VERSION, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetProperty",                              {"hipdnnGetProperty",                              CONV_VERSION, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnGetErrorString",                           {"hipdnnGetErrorString",                           CONV_ERROR, API_DNN}},
+
     {"cudnnCreate",                                   {"hipdnnCreate",                                   CONV_MATH_FUNC, API_DNN}},
     {"cudnnCreateTensorDescriptor",                   {"hipdnnCreateTensorDescriptor",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateDropoutDescriptor",                  {"hipdnnCreateDropoutDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateReduceTensorDescriptor",             {"hipdnnCreateReduceTensorDescriptor",             CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetReduceTensorDescriptor",                {"hipdnnSetReduceTensorDescriptor",                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetReduceTensorDescriptor",                {"hipdnnGetReduceTensorDescriptor",                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetReductionIndicesSize",                  {"hipdnnGetReductionIndicesSize",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetReductionWorkspaceSize",                {"hipdnnGetReductionWorkspaceSize",                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateOpTensorDescriptor",                 {"hipdnnCreateOpTensorDescriptor",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetOpTensorDescriptor",                    {"hipdnnSetOpTensorDescriptor",                    CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetOpTensorDescriptor",                    {"hipdnnGetOpTensorDescriptor",                    CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCreateRNNDescriptor",                      {"hipdnnCreateRNNDescriptor",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetStream",                                {"hipdnnSetStream",                                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetStream",                                {"hipdnnGetStream",                                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetRNNDescriptor_v5",                      {"hipdnnSetRNNDescriptor_v5",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetRNNDescriptor_v6",                      {"hipdnnSetRNNDescriptor_v6",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetRNNDescriptor",                         {"hipdnnSetRNNDescriptor",                         CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDropoutGetStatesSize",                     {"hipdnnDropoutGetStatesSize",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDropoutGetReserveSpaceSize",               {"hipdnnDropoutGetReserveSpaceSize",               CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnTransformTensor",                          {"hipdnnTransformTensor",                          CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnSetTensor4dDescriptor",                    {"hipdnnSetTensor4dDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetTensor4dDescriptor",                    {"hipdnnGetTensor4dDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnAddTensor",                                {"hipdnnAddTensor",                                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnOpTensor",                                 {"hipdnnOpTensor",                                 CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetTensorSizeInBytes",                     {"hipdnnGetTensorSizeInBytes",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetTensor4dDescriptorEx",                  {"hipdnnSetTensor4dDescriptorEx",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetTensorNdDescriptor",                    {"hipdnnSetTensorNdDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetTensorNdDescriptor",                    {"hipdnnGetTensorNdDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetTensorNdDescriptorEx",                  {"hipdnnSetTensorNdDescriptorEx",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnFindConvolutionForwardAlgorithm",          {"hipdnnFindConvolutionForwardAlgorithm",          CONV_MATH_FUNC, API_DNN}},
+    {"cudnnFindConvolutionForwardAlgorithmEx",        {"hipdnnFindConvolutionForwardAlgorithmEx",        CONV_MATH_FUNC, API_DNN}},
+    {"cudnnConvolutionBackwardFilter",                {"hipdnnConvolutionBackwardFilter",                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnConvolutionBackwardData",                  {"hipdnnConvolutionBackwardData",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnFindConvolutionBackwardFilterAlgorithm",   {"hipdnnFindConvolutionBackwardFilterAlgorithm",   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnFindConvolutionBackwardFilterAlgorithmEx", {"hipdnnFindConvolutionBackwardFilterAlgorithmEx", CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionBackwardFilterAlgorithm",    {"hipdnnGetConvolutionBackwardFilterAlgorithm",    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionBackwardFilterAlgorithm_v7", {"hipdnnGetConvolutionBackwardFilterAlgorithm_v7", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionBackwardFilterWorkspaceSize",{"hipdnnGetConvolutionBackwardFilterWorkspaceSize",CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionBackwardDataWorkspaceSize",  {"hipdnnGetConvolutionBackwardDataWorkspaceSize",  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionBackwardDataAlgorithm",      {"hipdnnGetConvolutionBackwardDataAlgorithm",      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionBackwardDataAlgorithm_v7",   {"hipdnnGetConvolutionBackwardDataAlgorithm_v7",   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionBackwardDataAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardDataAlgorithmMaxCount", CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionForwardAlgorithmMaxCount",   {"hipdnnGetConvolutionForwardAlgorithmMaxCount",   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNLinLayerMatrixParams",               {"hipdnnGetRNNLinLayerMatrixParams",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetRNNLinLayerBiasParams",                 {"hipdnnGetRNNLinLayerBiasParams",                 CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetRNNProjectionLayers",                   {"hipdnnSetRNNProjectionLayers",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNProjectionLayers",                   {"hipdnnGetRNNProjectionLayers",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetRNNAlgorithmDescriptor",                {"hipdnnSetRNNAlgorithmDescriptor",                CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNDescriptor",                         {"hipdnnGetRNNDescriptor",                         CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetFilterNdDescriptor",                    {"hipdnnGetFilterNdDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnFindConvolutionBackwardDataAlgorithm",     {"hipdnnFindConvolutionBackwardDataAlgorithm",     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnFindConvolutionBackwardDataAlgorithmEx",   {"hipdnnFindConvolutionBackwardDataAlgorithmEx",   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetDropoutDescriptor",                     {"hipdnnSetDropoutDescriptor",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnRestoreDropoutDescriptor",                 {"hipdnnRestoreDropoutDescriptor",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetDropoutDescriptor",                     {"hipdnnGetDropoutDescriptor",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetDropoutDescriptor",                     {"hipdnnGetDropoutDescriptor",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnSetConvolution2dDescriptor",               {"hipdnnSetConvolution2dDescriptor",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolution2dDescriptor",               {"hipdnnGetConvolution2dDescriptor",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetConvolutionMathType",                   {"hipdnnSetConvolutionMathType",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDropoutForward",                           {"hipdnnDropoutForward",                           CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDropoutBackward",                          {"hipdnnDropoutBackward",                          CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionMathType",                   {"hipdnnGetConvolutionMathType",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetConvolutionGroupCount",                 {"hipdnnSetConvolutionGroupCount",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionGroupCount",                 {"hipdnnGetConvolutionGroupCount",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnGetConvolution2dForwardOutputDim",         {"hipdnnGetConvolution2dForwardOutputDim",         CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetConvolutionNdDescriptor",               {"hipdnnSetConvolutionNdDescriptor",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionNdDescriptor",               {"hipdnnGetConvolutionNdDescriptor",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionNdForwardOutputDim",         {"hipdnnGetConvolutionNdForwardOutputDim",         CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnCreateFilterDescriptor",                   {"hipdnnCreateFilterDescriptor",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreatePersistentRNNPlan",                  {"hipdnnCreatePersistentRNNPlan",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetPersistentRNNPlan",                     {"hipdnnSetPersistentRNNPlan",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnRNNForwardInference",                      {"hipdnnRNNForwardInference",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnRNNBackwardWeights",                       {"hipdnnRNNBackwardWeights",                       CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetRNNParamsSize",                         {"hipdnnGetRNNParamsSize",                         CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetRNNWorkspaceSize",                      {"hipdnnGetRNNWorkspaceSize",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetRNNTrainingReserveSize",                {"hipdnnGetRNNTrainingReserveSize",                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetFilterNdDescriptor",                    {"hipdnnSetFilterNdDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnRNNForwardTraining",                       {"hipdnnRNNForwardTraining",                       CONV_MATH_FUNC, API_DNN}},
+    {"cudnnRNNBackwardData",                          {"hipdnnRNNBackwardData",                          CONV_MATH_FUNC, API_DNN}},
     {"cudnnSetFilter4dDescriptor",                    {"hipdnnSetFilter4dDescriptor",                    CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetRNNMatrixMathType",                     {"hipdnnSetRNNMatrixMathType",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetRNNMatrixMathType",                     {"hipdnnGetRNNMatrixMathType",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNForwardInferenceAlgorithmMaxCount",  {"hipdnnGetRNNForwardInferenceAlgorithmMaxCount",  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnFindRNNForwardInferenceAlgorithmEx",       {"hipdnnFindRNNForwardInferenceAlgorithmEx",       CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNForwardTrainingAlgorithmMaxCount",   {"hipdnnGetRNNForwardTrainingAlgorithmMaxCount",   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnFindRNNForwardTrainingAlgorithmEx",        {"hipdnnFindRNNForwardTrainingAlgorithmEx",        CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNBackwardDataAlgorithmMaxCount",      {"hipdnnGetRNNBackwardDataAlgorithmMaxCount",      CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnFindRNNBackwardDataAlgorithmEx",           {"hipdnnFindRNNBackwardDataAlgorithmEx",           CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetRNNBackwardWeightsAlgorithmMaxCount",   {"hipdnnGetRNNBackwardWeightsAlgorithmMaxCount",   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnFindRNNBackwardWeightsAlgorithmEx",        {"hipdnnFindRNNBackwardWeightsAlgorithmEx",        CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnCreateConvolutionDescriptor",              {"hipdnnCreateConvolutionDescriptor",              CONV_MATH_FUNC, API_DNN}},
     {"cudnnGetConvolutionForwardAlgorithm",           {"hipdnnGetConvolutionForwardAlgorithm",           CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetConvolutionForwardAlgorithm_v7",        {"hipdnnGetConvolutionForwardAlgorithm_v7",        CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnConvolutionForward",                       {"hipdnnConvolutionForward",                       CONV_MATH_FUNC, API_DNN}},
     {"cudnnGetConvolutionForwardWorkspaceSize",       {"hipdnnGetConvolutionForwardWorkspaceSize",       CONV_MATH_FUNC, API_DNN}},
+    {"cudnnConvolutionBiasActivationForward",         {"hipdnnConvolutionBiasActivationForward",         CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetConvolutionBackwardFilterAlgorithmMaxCount", {"hipdnnGetConvolutionBackwardFilterAlgorithmMaxCount",       CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnConvolutionBackwardBias",                  {"hipdnnConvolutionBackwardBias",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnReduceTensor",                             {"hipdnnReduceTensor",                             CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetTensor",                                {"hipdnnSetTensor",                                CONV_MATH_FUNC, API_DNN}},
+    {"cudnnScaleTensor",                              {"hipdnnScaleTensor",                              CONV_MATH_FUNC, API_DNN}},
+    {"cudnnIm2Col",                                   {"hipdnnIm2Col",                                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
     {"cudnnDestroyTensorDescriptor",                  {"hipdnnDestroyTensorDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyOpTensorDescriptor",                {"hipdnnDestroyOpTensorDescriptor",                CONV_MATH_FUNC, API_DNN}},
     {"cudnnDestroyConvolutionDescriptor",             {"hipdnnDestroyConvolutionDescriptor",             CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyDropoutDescriptor",                 {"hipdnnDestroyDropoutDescriptor",                 CONV_MATH_FUNC, API_DNN}},
     {"cudnnDestroyFilterDescriptor",                  {"hipdnnDestroyFilterDescriptor",                  CONV_MATH_FUNC, API_DNN}},
-    {"cudnnDestroyFilterDescriptor",                  {"hipdnnDestroyFilterDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyRNNDescriptor",                     {"hipdnnDestroyRNNDescriptor",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyReduceTensorDescriptor",            {"hipdnnDestroyReduceTensorDescriptor",            CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyPersistentRNNPlan",                 {"hipdnnDestroyPersistentRNNPlan",                 CONV_MATH_FUNC, API_DNN}},
     {"cudnnDestroy",                                  {"hipdnnDestroy",                                  CONV_MATH_FUNC, API_DNN}},
-
+    {"cudnnSoftmaxForward",                           {"hipdnnSoftmaxForward",                           CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSoftmaxBackward",                          {"hipdnnSoftmaxBackward",                          CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreatePoolingDescriptor",                  {"hipdnnCreatePoolingDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetPooling2dDescriptor",                   {"hipdnnSetPooling2dDescriptor",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetPooling2dDescriptor",                   {"hipdnnGetPooling2dDescriptor",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetPoolingNdDescriptor",                   {"hipdnnSetPoolingNdDescriptor",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetPoolingNdDescriptor",                   {"hipdnnGetPoolingNdDescriptor",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetPoolingNdForwardOutputDim",             {"hipdnnGetPoolingNdForwardOutputDim",             CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetPooling2dForwardOutputDim",             {"hipdnnGetPooling2dForwardOutputDim",             CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyPoolingDescriptor",                 {"hipdnnDestroyPoolingDescriptor",                 CONV_MATH_FUNC, API_DNN}},
+    {"cudnnPoolingForward",                           {"hipdnnPoolingForward",                           CONV_MATH_FUNC, API_DNN}},
+    {"cudnnPoolingBackward",                          {"hipdnnPoolingBackward",                          CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateActivationDescriptor",               {"hipdnnCreateActivationDescriptor",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetActivationDescriptor",                  {"hipdnnSetActivationDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetActivationDescriptor",                  {"hipdnnGetActivationDescriptor",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyActivationDescriptor",              {"hipdnnDestroyActivationDescriptor",              CONV_MATH_FUNC, API_DNN}},
+    {"cudnnActivationForward",                        {"hipdnnActivationForward",                        CONV_MATH_FUNC, API_DNN}},
+    {"cudnnActivationBackward",                       {"hipdnnActivationBackward",                       CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateLRNDescriptor",                      {"hipdnnCreateLRNDescriptor",                      CONV_MATH_FUNC, API_DNN}},
+    {"cudnnSetLRNDescriptor",                         {"hipdnnSetLRNDescriptor",                         CONV_MATH_FUNC, API_DNN}},
+    {"cudnnGetLRNDescriptor",                         {"hipdnnGetLRNDescriptor",                         CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDestroyLRNDescriptor",                     {"hipdnnDestroyLRNDescriptor",                     CONV_MATH_FUNC, API_DNN}},
+    {"cudnnLRNCrossChannelForward",                   {"hipdnnLRNCrossChannelForward",                   CONV_MATH_FUNC, API_DNN}},
+    {"cudnnLRNCrossChannelBackward",                  {"hipdnnLRNCrossChannelBackward",                  CONV_MATH_FUNC, API_DNN}},
+    {"cudnnDivisiveNormalizationForward",             {"hipdnnDivisiveNormalizationForward",             CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDivisiveNormalizationBackward",            {"hipdnnDivisiveNormalizationBackward",            CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDeriveBNTensorDescriptor",                 {"hipdnnDeriveBNTensorDescriptor",                 CONV_MATH_FUNC, API_DNN}},
+    {"cudnnBatchNormalizationForwardTraining",        {"hipdnnBatchNormalizationForwardTraining",        CONV_MATH_FUNC, API_DNN}},
+    {"cudnnBatchNormalizationForwardInference",       {"hipdnnBatchNormalizationForwardInference",       CONV_MATH_FUNC, API_DNN}},
+    {"cudnnBatchNormalizationBackward",               {"hipdnnBatchNormalizationBackward",               CONV_MATH_FUNC, API_DNN}},
+    {"cudnnCreateSpatialTransformerDescriptor",       {"hipdnnCreateSpatialTransformerDescriptor",       CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetSpatialTransformerNdDescriptor",        {"hipdnnSetSpatialTransformerNdDescriptor",        CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDestroySpatialTransformerDescriptor",      {"hipdnnDestroySpatialTransformerDescriptor",      CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTfGridGeneratorForward",            {"hipdnnSpatialTfGridGeneratorForward",            CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTfGridGeneratorBackward",           {"hipdnnSpatialTfGridGeneratorBackward",           CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTfSamplerForward",                  {"hipdnnSpatialTfSamplerForward",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSpatialTfSamplerBackward",                 {"hipdnnSpatialTfSamplerBackward",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCreateCTCLossDescriptor",                  {"hipdnnCreateCTCLossDescriptor",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetCTCLossDescriptor",                     {"hipdnnSetCTCLossDescriptor",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetCTCLossDescriptor",                     {"hipdnnGetCTCLossDescriptor",                     CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDestroyCTCLossDescriptor",                 {"hipdnnDestroyCTCLossDescriptor",                 CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCTCLoss",                                  {"hipdnnCTCLoss",                                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetCTCLossWorkspaceSize",                  {"hipdnnGetCTCLossWorkspaceSize",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCreateAlgorithmDescriptor",                {"hipdnnCreateAlgorithmDescriptor",                CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetAlgorithmDescriptor",                   {"hipdnnSetAlgorithmDescriptor",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetAlgorithmDescriptor",                   {"hipdnnGetAlgorithmDescriptor",                   CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCopyAlgorithmDescriptor",                  {"hipdnnCopyAlgorithmDescriptor",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDestroyAlgorithmDescriptor",               {"hipdnnDestroyAlgorithmDescriptor",               CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnCreateAlgorithmPerformance",               {"hipdnnCreateAlgorithmPerformance",               CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetAlgorithmPerformance",                  {"hipdnnSetAlgorithmPerformance",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetAlgorithmPerformance",                  {"hipdnnGetAlgorithmPerformance",                  CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnDestroyAlgorithmPerformance",              {"hipdnnDestroyAlgorithmPerformance",              CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetAlgorithmSpaceSize",                    {"hipdnnGetAlgorithmSpaceSize",                    CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSaveAlgorithm",                            {"hipdnnSaveAlgorithm",                            CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnRestoreAlgorithm",                         {"hipdnnRestoreAlgorithm",                         CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnSetCallback",                              {"hipdnnSetCallback",                              CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
+    {"cudnnGetCallback",                              {"hipdnnGetCallback",                              CONV_MATH_FUNC, API_DNN, HIP_UNSUPPORTED}},
 };
 
 const std::map<llvm::StringRef, hipCounter>& CUDA_RENAMES_MAP() {
diff --git a/include/hip/hcc_detail/device_functions.h b/include/hip/hcc_detail/device_functions.h
index ca236a1125..843e80e8b4 100644
--- a/include/hip/hcc_detail/device_functions.h
+++ b/include/hip/hcc_detail/device_functions.h
@@ -88,6 +88,11 @@ __device__ static inline unsigned int __usad(unsigned int x, unsigned int y, uns
     return __hip_hc_ir_usad_int(x, y, z);
 }
 
+extern __device__ __attribute__((const)) unsigned int __mbcnt_lo(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.lo");
+extern __device__ __attribute__((const)) unsigned int __mbcnt_hi(unsigned int x, unsigned int y) __asm("llvm.amdgcn.mbcnt.hi");
+
+__device__ static inline unsigned int __lane_id() { return  __mbcnt_hi(-1, __mbcnt_lo(-1, 0)); }
+
 /*
 Rounding modes are not yet supported in HIP
 */
diff --git a/include/hip/hcc_detail/hip_runtime.h b/include/hip/hcc_detail/hip_runtime.h
index 92f06e9174..1a6b0f7dda 100644
--- a/include/hip/hcc_detail/hip_runtime.h
+++ b/include/hip/hcc_detail/hip_runtime.h
@@ -29,6 +29,20 @@ THE SOFTWARE.
 #ifndef HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
 #define HIP_INCLUDE_HIP_HCC_DETAIL_HIP_RUNTIME_H
 
+#if defined(__HCC__)
+#define __HCC_OR_HIP_CLANG__ 1
+#define __HCC_ONLY__ 1
+#define __HIP_CLANG_ONLY__ 0
+#elif defined(__clang__) && defined(__HIP__)
+#define __HCC_OR_HIP_CLANG__ 1
+#define __HCC_ONLY__ 0
+#define __HIP_CLANG_ONLY__ 1
+#else
+#define __HCC_OR_HIP_CLANG__ 0
+#define __HCC_ONLY__ 0
+#define __HIP_CLANG_ONLY__ 0
+#endif
+
 //---
 // Top part of file can be compiled with any compiler
 
@@ -41,15 +55,16 @@ THE SOFTWARE.
 #include <stddef.h>
 #endif  //__cplusplus
 
-#if __HCC__
+#if __HCC_OR_HIP_CLANG__
 
 // Define NVCC_COMPAT for CUDA compatibility
 #define NVCC_COMPAT
 #define CUDA_SUCCESS hipSuccess
 
 #include <hip/hip_runtime_api.h>
+#endif  // __HCC_OR_HIP_CLANG__
 
-
+#if __HCC__
 // define HIP_ENABLE_PRINTF to enable printf
 #ifdef HIP_ENABLE_PRINTF
 #define HCC_ENABLE_ACCELERATOR_PRINTF 1
@@ -164,6 +179,10 @@ extern int HIP_TRACE_API;
 #define __HCC_C__
 #endif
 
+#endif  // defined __HCC__
+
+#if __HCC_OR_HIP_CLANG__
+
 // TODO - hipify-clang - change to use the function call.
 //#define warpSize hc::__wavesize()
 static constexpr int warpSize = 64;
@@ -371,6 +390,10 @@ __device__ void __threadfence_system(void);
  * @}
  */
 
+#endif  // __HCC_OR_HIP_CLANG__
+
+#if defined __HCC__
+
 template <
     typename std::common_type<decltype(hc_get_group_id), decltype(hc_get_group_size),
                               decltype(hc_get_num_groups), decltype(hc_get_workitem_id)>::type f>
@@ -414,6 +437,8 @@ static constexpr Coordinates<hc_get_workitem_id> threadIdx;
 #define hipGridDim_y (hc_get_num_groups(1))
 #define hipGridDim_z (hc_get_num_groups(2))
 
+#endif // defined __HCC__
+#if __HCC_OR_HIP_CLANG__
 extern "C" __device__ void* __hip_hc_memcpy(void* dst, const void* src, size_t size);
 extern "C" __device__ void* __hip_hc_memset(void* ptr, uint8_t val, size_t size);
 extern "C" __device__ void* __hip_hc_malloc(size_t);
@@ -446,7 +471,9 @@ static inline __device__ void printf(const char* format, All... all) {}
 #endif
 
 #endif
+#endif //__HCC_OR_HIP_CLANG__
 
+#ifdef __HCC__
 
 #define __syncthreads() hc_barrier(CLK_LOCAL_MEM_FENCE)
 
@@ -514,7 +541,9 @@ extern void ihipPostLaunchKernel(const char* kernelName, hipStream_t stream, gri
  *   @}
  */
 
-
+//
+// hip-clang functions
+//
 #elif defined(__clang__) && defined(__HIP__)
 
 #define HIP_KERNEL_NAME(...) __VA_ARGS__
@@ -612,6 +641,27 @@ extern const __device__ __attribute__((weak)) __hip_builtin_gridDim_t gridDim;
 #define hipGridDim_y gridDim.y
 #define hipGridDim_z gridDim.z
 
+#pragma push_macro("__DEVICE__")
+#define __DEVICE__ extern "C" __device__ __attribute__((always_inline)) \
+  __attribute__((weak))
+
+__DEVICE__ void __device_trap() __asm("llvm.trap");
+
+__DEVICE__ void inline __assert_fail(const char * __assertion,
+                                     const char *__file,
+                                     unsigned int __line,
+                                     const char *__function)
+{
+    // Ignore all the args for now.
+    __device_trap();
+}
+
+extern "C" __device__ __attribute__((noduplicate)) void __syncthreads();
+
+#pragma push_macro("__DEVICE__")
+
+#include <hip/hcc_detail/math_functions.h>
+
 #endif
 
 #endif  // HIP_HCC_DETAIL_RUNTIME_H
diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h
index 8becab3c9c..0a80a583c7 100644
--- a/include/hip/hcc_detail/hip_runtime_api.h
+++ b/include/hip/hcc_detail/hip_runtime_api.h
@@ -94,6 +94,19 @@ typedef struct ihipModule_t* hipModule_t;
 
 typedef struct ihipModuleSymbol_t* hipFunction_t;
 
+struct hipFuncAttributes {
+    int binaryVersion;
+    int cacheModeCA;
+    size_t constSizeBytes;
+    size_t localSizeBytes;
+    int maxDynamicSharedSizeBytes;
+    int maxThreadsPerBlock;
+    int numRegs;
+    int preferredShmemCarveout;
+    int ptxVersion;
+    size_t sharedSizeBytes;
+};
+
 typedef struct ihipEvent_t* hipEvent_t;
 
 enum hipLimit_t {
@@ -1450,6 +1463,27 @@ hipError_t hipMemset2D(void* dst, size_t pitch, int value, size_t width, size_t
 
 hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, size_t width, size_t height,hipStream_t stream __dparm(0));
 
+/**
+ *  @brief Fills synchronously the memory area pointed to by pitchedDevPtr with the constant value.
+ *
+ *  @param[in] pitchedDevPtr
+ *  @param[in]  value - constant value to be set
+ *  @param[in]  extent
+ *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
+ */
+hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent );
+
+/**
+ *  @brief Fills asynchronously the memory area pointed to by pitchedDevPtr with the constant value.
+ *
+ *  @param[in] pitchedDevPtr
+ *  @param[in]  value - constant value to be set
+ *  @param[in]  extent
+ *  @param[in]  stream
+ *  @return #hipSuccess, #hipErrorInvalidValue, #hipErrorMemoryFree
+ */
+hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent ,hipStream_t stream __dparm(0));
+
 /**
  * @brief Query memory info.
  * Return snapshot of free memory, and total allocatable memory on the device.
@@ -2222,6 +2256,17 @@ hipError_t hipModuleUnload(hipModule_t module);
  */
 hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname);
 
+/**
+ * @bried Find out attributes for a given function.
+ *
+ * @param [out] attr
+ * @param [in] func
+ *
+ * @returns hipSuccess, hipErrorInvalidDeviceFunction
+ */
+
+hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func);
+
 /**
  * @brief returns device memory pointer and size of the kernel present in the module with symbol @p
  * name
diff --git a/include/hip/hcc_detail/math_functions.h b/include/hip/hcc_detail/math_functions.h
index c11112b1dc..49650987ac 100644
--- a/include/hip/hcc_detail/math_functions.h
+++ b/include/hip/hcc_detail/math_functions.h
@@ -27,6 +27,7 @@ THE SOFTWARE.
 #include <hip/hip_runtime.h>
 
 #include <assert.h>
+#include <limits.h>
 #include <stdint.h>
 
 __device__
@@ -101,6 +102,9 @@ uint64_t __make_mantissa(const char* tagp)
 // BEGIN FLOAT
 __device__
 inline
+float abs(float x) { return __ocml_fabs_f32(x); }
+__device__
+inline
 float acosf(float x) { return __ocml_acos_f32(x); }
 __device__
 inline
@@ -628,6 +632,9 @@ float __tanf(float x) { return __ocml_tan_f32(x); }
 // BEGIN DOUBLE
 __device__
 inline
+double abs(double x) { return __ocml_fabs_f64(x); }
+__device__
+inline
 double acos(double x) { return __ocml_acos_f64(x); }
 __device__
 inline
@@ -1101,4 +1108,37 @@ double __fma_rz(double x, double y, double z)
     return __llvm_fma_rtz_f64(x, y, z);
 }
 // END INTRINSICS
-// END DOUBLE
\ No newline at end of file
+// END DOUBLE
+
+// BEGIN INTEGER
+__device__
+inline
+int abs(int x)
+{
+    int sgn = x >> (sizeof(int) * CHAR_BIT - 1);
+    return (x ^ sgn) - sgn;
+}
+__device__
+inline
+long labs(long x)
+{
+    long sgn = x >> (sizeof(long) * CHAR_BIT - 1);
+    return (x ^ sgn) - sgn;
+}
+__device__
+inline
+long long llabs(long long x)
+{
+    long long sgn = x >> (sizeof(long long) * CHAR_BIT - 1);
+    return (x ^ sgn) - sgn;
+}
+
+#if defined(__cplusplus)
+    __device__
+    inline
+    long abs(long x) { return labs(x); }
+    __device__
+    inline
+    long long abs(long long x) { return llabs(x); }
+#endif
+// END INTEGER
diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp
index b6ccafb205..ac689fdb89 100644
--- a/include/hip/hcc_detail/program_state.hpp
+++ b/include/hip/hcc_detail/program_state.hpp
@@ -22,8 +22,10 @@ THE SOFTWARE.
 
 #pragma once
 
+#include <hsa/amd_hsa_kernel_code.h>
 #include <hsa/hsa.h>
 #include <hsa/hsa_ext_amd.h>
+#include <hsa/hsa_ven_amd_loader.h>
 
 #include <cstddef>
 #include <istream>
@@ -46,11 +48,45 @@ struct hash<hsa_agent_t> {
 inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; }
 
 namespace hip_impl {
-struct Kernel_descriptor {
-    std::uint64_t kernel_object_;
-    std::uint32_t group_size_;
-    std::uint32_t private_size_;
-    std::string name_;
+class Kernel_descriptor {
+    std::uint64_t kernel_object_{};
+    amd_kernel_code_t const* kernel_header_{nullptr};
+    std::string name_{};
+public:
+    Kernel_descriptor() = default;
+    Kernel_descriptor(std::uint64_t kernel_object, const std::string& name)
+        : kernel_object_{kernel_object}, name_{name}
+    {
+        bool supported{false};
+        std::uint16_t min_v{UINT16_MAX};
+        auto r = hsa_system_major_extension_supported(
+            HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported);
+
+        if (r != HSA_STATUS_SUCCESS || !supported) return;
+
+        hsa_ven_amd_loader_1_01_pfn_t tbl{};
+
+        r = hsa_system_get_major_extension_table(
+            HSA_EXTENSION_AMD_LOADER,
+            1,
+            sizeof(tbl),
+            reinterpret_cast<void*>(&tbl));
+
+        if (r != HSA_STATUS_SUCCESS) return;
+        if (!tbl.hsa_ven_amd_loader_query_host_address) return;
+
+        r = tbl.hsa_ven_amd_loader_query_host_address(
+            reinterpret_cast<void*>(kernel_object_),
+            reinterpret_cast<const void**>(&kernel_header_));
+
+        if (r != HSA_STATUS_SUCCESS) return;
+    }
+    Kernel_descriptor(const Kernel_descriptor&) = default;
+    Kernel_descriptor(Kernel_descriptor&&) = default;
+    ~Kernel_descriptor() = default;
+
+    Kernel_descriptor& operator=(const Kernel_descriptor&) = default;
+    Kernel_descriptor& operator=(Kernel_descriptor&&) = default;
 
     operator hipFunction_t() const {  // TODO: this is awful and only meant for illustration.
         return reinterpret_cast<hipFunction_t>(const_cast<Kernel_descriptor*>(this));
diff --git a/include/hip/hip_runtime_api.h b/include/hip/hip_runtime_api.h
index bfde2e942d..2b36f3e140 100644
--- a/include/hip/hip_runtime_api.h
+++ b/include/hip/hip_runtime_api.h
@@ -243,6 +243,8 @@ typedef enum __HIP_NODISCARD hipError_t {
         1062,  ///< Produced when trying to unlock a non-page-locked memory.
     hipErrorMapBufferObjectFailed =
         1071,    ///< Produced when the IPC memory attach failed from ROCr.
+    hipErrorAssert =
+        1081,    ///< Produced when the kernel calls assert.
     hipErrorTbd  ///< Marker that more error codes are needed.
 } hipError_t;
 
diff --git a/include/hip/nvcc_detail/hip_runtime_api.h b/include/hip/nvcc_detail/hip_runtime_api.h
index 6f222648a2..fbff263295 100644
--- a/include/hip/nvcc_detail/hip_runtime_api.h
+++ b/include/hip/nvcc_detail/hip_runtime_api.h
@@ -150,16 +150,20 @@ typedef CUfunction hipFunction_t;
 typedef CUdeviceptr hipDeviceptr_t;
 typedef struct cudaArray hipArray;
 typedef struct cudaArray* hipArray_const_t;
+typedef cudaFuncAttributes hipFuncAttributes;
 #define hipMemcpy3DParms cudaMemcpy3DParms
 #define hipArrayDefault cudaArrayDefault
 
 typedef cudaTextureObject_t hipTextureObject_t;
 typedef cudaSurfaceObject_t hipSurfaceObject_t;
+#define hipTextureType1D cudaTextureType1D
+#define hipTextureType1DLayered cudaTextureType1DLayered
 #define hipTextureType2D cudaTextureType2D
 #define hipTextureType3D cudaTextureType3D
 #define hipDeviceMapHost cudaDeviceMapHost
 
 #define hipExtent cudaExtent
+#define hipPitchedPtr cudaPitchedPtr
 #define make_hipExtent make_cudaExtent
 #define make_hipPos make_cudaPos
 #define make_hipPitchedPtr make_cudaPitchedPtr
@@ -381,6 +385,10 @@ inline static hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width,
     return hipCUDAErrorTohipError(cudaMallocPitch(ptr, pitch, width, height));
 }
 
+inline static hipError_t hipMalloc3D(hipPitchedPtr* pitchedDevPtr, hipExtent extent) {
+    return hipCUDAErrorTohipError(cudaMalloc3D(pitchedDevPtr, extent));
+}
+
 inline static hipError_t hipFree(void* ptr) { return hipCUDAErrorTohipError(cudaFree(ptr)); }
 
 inline static hipError_t hipMallocHost(void** ptr, size_t size)
@@ -649,6 +657,14 @@ inline static hipError_t hipMemset2DAsync(void* dst, size_t pitch, int value, si
     return hipCUDAErrorTohipError(cudaMemset2DAsync(dst, pitch, value, width, height, stream));
 }
 
+inline static hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent ){
+    return hipCUDAErrorTohipError(cudaMemset3D(pitchedDevPtr, value, extent));
+}
+
+inline static hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent, hipStream_t stream __dparm(0) ){
+    return hipCUDAErrorTohipError(cudaMemset3DAsync(pitchedDevPtr, value, extent, stream));
+}
+
 inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int device) {
     struct cudaDeviceProp cdprop;
     cudaError_t cerror;
@@ -1092,6 +1108,10 @@ inline static hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule
     return hipCUResultTohipError(cuModuleGetFunction(function, module, kname));
 }
 
+inline static hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) {
+    return hipCUDAErrorTohipError(cudaFuncGetAttributes(attr, func));
+}
+
 inline static hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
                                             const char* name) {
     return hipCUResultTohipError(cuModuleGetGlobal(dptr, bytes, hmod, name));
@@ -1148,8 +1168,8 @@ inline static hipError_t hipBindTexture(size_t* offset, const struct texture<T,
 }
 
 template <class T, int dim, enum cudaTextureReadMode readMode>
-inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>* tex,
-                                        const void* devPtr, const struct hipChannelFormatDesc* desc,
+inline static hipError_t hipBindTexture(size_t* offset, struct texture<T, dim, readMode>& tex,
+                                        const void* devPtr, const struct hipChannelFormatDesc& desc,
                                         size_t size = UINT_MAX) {
     return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
 }
@@ -1159,6 +1179,11 @@ inline static hipError_t hipUnbindTexture(struct texture<T, dim, readMode>* tex)
     return hipCUDAErrorTohipError(cudaUnbindTexture(tex));
 }
 
+inline static hipError_t hipBindTexture(size_t* offset, textureReference* tex, const void* devPtr,
+                                        const hipChannelFormatDesc* desc, size_t size = UINT_MAX){
+    return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
+}
+
 template <class T, int dim, enum hipTextureReadMode readMode>
 inline static hipError_t hipBindTextureToArray(struct texture<T, dim, readMode>& tex,
                                                hipArray_const_t array,
diff --git a/lpl_ca/ca.hpp b/lpl_ca/ca.hpp
index bb1963bede..0ef8458c20 100644
--- a/lpl_ca/ca.hpp
+++ b/lpl_ca/ca.hpp
@@ -23,7 +23,7 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& inputs
                "https://reviews.llvm.org/D13909; "
                "the code object format is documented at: "
                "https://www.llvm.org/docs/AMDGPUUsage.html#code-object.") |
-           clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
+           clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
                "targets for which code objects are to be extracted from "
                "the fat binary; must be included in the set of processors "
                "with ROCm support from "
@@ -76,4 +76,4 @@ inline void validate_inputs(const std::vector<std::string>& inputs) {
         throw std::runtime_error{"Non existent file " + *it + " passed as input."};
     }
 }
-}  // namespace hip_impl
\ No newline at end of file
+}  // namespace hip_impl
diff --git a/lpl_ca/common.hpp b/lpl_ca/common.hpp
index 74f4db9221..7652f08c46 100644
--- a/lpl_ca/common.hpp
+++ b/lpl_ca/common.hpp
@@ -12,7 +12,7 @@ namespace hip_impl {
 inline const std::unordered_set<std::string>& amdgpu_targets() {  // The evolving list lives at:
     // https://www.llvm.org/docs/AMDGPUUsage.html#processors.
     static const std::unordered_set<std::string> r{"gfx701", "gfx801", "gfx802", "gfx803",
-                                                   "gfx900"};
+                                                   "gfx900", "gfx906"};
 
     return r;
 }
@@ -77,4 +77,4 @@ inline void validate_targets(const std::vector<std::string>& x) {
         }
     }
 }
-}  // Namespace hip_impl.
\ No newline at end of file
+}  // Namespace hip_impl.
diff --git a/lpl_ca/lpl.hpp b/lpl_ca/lpl.hpp
index c9346fd793..cbd7fe8386 100644
--- a/lpl_ca/lpl.hpp
+++ b/lpl_ca/lpl.hpp
@@ -132,9 +132,9 @@ inline clara::Parser cmdline_parser(bool& help, std::vector<std::string>& source
                "file is documented at: https://reviews.llvm.org/D13909.") |
            clara::Arg{sources,
                       "a.cpp b.cpp etc."}("inputs for compilation; must contain valid C++ code.") |
-           clara::Opt{targets, "gfx803,gfx900 etc."}["-t"]["--targets"](
+           clara::Opt{targets, "gfx803,gfx900,gfx906 etc."}["-t"]["--targets"](
                "targets for AMDGPU lowering; must be included in the set "
                "of processors with ROCm support from "
                "https://www.llvm.org/docs/AMDGPUUsage.html#processors.");
 }
-}  // namespace hip_impl
\ No newline at end of file
+}  // namespace hip_impl
diff --git a/samples/1_Utils/hipCommander/Makefile b/samples/1_Utils/hipCommander/Makefile
index a411763b7f..c3a19073f0 100644
--- a/samples/1_Utils/hipCommander/Makefile
+++ b/samples/1_Utils/hipCommander/Makefile
@@ -19,7 +19,7 @@ $(EXE): hipCommander.cpp
 	$(HIPCC) $(CXXFLAGS) $^ -o $@
 
 nullkernel.hsaco : nullkernel.hip.cpp
-	$(HIPCC) --genco nullkernel.hip -o nullkernel.hsaco
+	$(HIPCC) --genco nullkernel.hip.cpp -o nullkernel.hsaco
 
 
 install: $(EXE)
diff --git a/src/device_functions.cpp b/src/device_functions.cpp
index 1af66f97e5..8ef19bab3f 100644
--- a/src/device_functions.cpp
+++ b/src/device_functions.cpp
@@ -278,21 +278,6 @@ struct uchar2Holder {
     };
 } __attribute__((aligned(8)));
 
-struct intHolder {
-    union {
-        signed int si[2];
-        signed int long sl;
-    };
-} __attribute__((aligned(8)));
-
-struct uintHolder {
-    union {
-        signed int ui[2];
-        signed int long ul;
-    };
-} __attribute__((aligned(8)));
-
-
 __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int s) {
     struct uchar2Holder cHoldVal;
     struct ucharHolder cHoldKey;
@@ -308,21 +293,29 @@ __device__ unsigned int __byte_perm(unsigned int x, unsigned int y, unsigned int
 }
 
 __device__ long long __mul64hi(long long int x, long long int y) {
-    struct intHolder iHold1;
-    struct intHolder iHold2;
-    iHold1.sl = x;
-    iHold2.sl = y;
-    iHold1.sl = iHold1.si[1] * iHold2.si[1];
-    return iHold1.sl;
+    ulong x0 = (ulong)x & 0xffffffffUL;
+    long x1 = x >> 32;
+    ulong y0 = (ulong)y & 0xffffffffUL;
+    long y1 = y >> 32;
+    ulong z0 = x0*y0;
+    long t = x1*y0 + (z0 >> 32);
+    long z1 = t & 0xffffffffL;
+    long z2 = t >> 32;
+    z1 = x0*y1 + z1;
+    return x1*y1 + z2 + (z1 >> 32);
 }
 
 __device__ unsigned long long __umul64hi(unsigned long long int x, unsigned long long int y) {
-    struct uintHolder uHold1;
-    struct uintHolder uHold2;
-    uHold1.ul = x;
-    uHold2.ul = y;
-    uHold1.ul = uHold1.ui[1] * uHold2.ui[1];
-    return uHold1.ul;
+    ulong x0 = x & 0xffffffffUL;
+    ulong x1 = x >> 32;
+    ulong y0 = y & 0xffffffffUL;
+    ulong y1 = y >> 32;
+    ulong z0 = x0*y0;
+    ulong t = x1*y0 + (z0 >> 32);
+    ulong z1 = t & 0xffffffffUL;
+    ulong z2 = t >> 32;
+    z1 = x0*y1 + z1;
+    return x1*y1 + z2 + (z1 >> 32);
 }
 
 /*
diff --git a/src/functional_grid_launch.inl b/src/functional_grid_launch.inl
index b555967ebc..9ecad51476 100644
--- a/src/functional_grid_launch.inl
+++ b/src/functional_grid_launch.inl
@@ -107,8 +107,8 @@ namespace hip_impl
             it0->second.cbegin(),
             it0->second.cend(),
             [=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
-                return x.first.handle == agent.handle;
-            });
+            return x.first == agent;
+        });
 
         if (it1 == it0->second.cend()) {
             throw runtime_error{
diff --git a/src/hip_clang.cpp b/src/hip_clang.cpp
index 80b6111fc2..f7573e2819 100644
--- a/src/hip_clang.cpp
+++ b/src/hip_clang.cpp
@@ -27,10 +27,10 @@ THE SOFTWARE.
 #include "hip_hcc_internal.h"
 #include "trace_helper.h"
 
-constexpr unsigned __cudaFatMAGIC2 = 0x466243b1;
+constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
 
 #define CLANG_OFFLOAD_BUNDLER_MAGIC "__CLANG_OFFLOAD_BUNDLE__"
-#define AMDGCN_AMDHSA_TRIPLE "openmp-amdgcn--amdhsa"
+#define AMDGCN_AMDHSA_TRIPLE "hip-amdgcn-amd-amdhsa"
 
 struct __ClangOffloadBundleDesc {
   uint64_t offset;
@@ -59,7 +59,7 @@ __hipRegisterFatBinary(const void* data)
   HIP_INIT();
 
   const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
-  if (fbwrapper->magic != __cudaFatMAGIC2 || fbwrapper->version != 1) {
+  if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
     return nullptr;
   }
 
diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp
index 48a8da81ca..9b6758ddf4 100644
--- a/src/hip_memory.cpp
+++ b/src/hip_memory.cpp
@@ -1132,15 +1132,19 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
 hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
     HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind);
 
+    hipError_t e = hipSuccess;
+
+    // Return success if number of bytes to copy is 0
+    if (sizeBytes == 0) return ihipLogStatus(e);
+
     hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
 
     hc::completion_future marker;
 
-    hipError_t e = hipSuccess;
     if(dst==NULL || src==NULL)
 	{
 	e=hipErrorInvalidValue;
-	return e;
+	return ihipLogStatus(e);
 	}
     try {
         stream->locked_copySync(dst, src, sizeBytes, kind);
@@ -1208,7 +1212,6 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dst, hipDeviceptr_t src, size_t sizeByte
     return ihipLogStatus(e);
 }
 
-
 hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
     HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes);
 
@@ -1227,7 +1230,6 @@ hipError_t hipMemcpyHtoH(void* dst, void* src, size_t sizeBytes) {
     return ihipLogStatus(e);
 }
 
-
 hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
                           hipStream_t stream) {
     HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind, stream);
@@ -1235,7 +1237,6 @@ hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, hipMemcp
     return ihipLogStatus(hip_internal::memcpyAsync(dst, src, sizeBytes, kind, stream));
 }
 
-
 hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dst, void* src, size_t sizeBytes, hipStream_t stream) {
     HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, stream);
 
@@ -1258,65 +1259,6 @@ hipError_t hipMemcpyDtoHAsync(void* dst, hipDeviceptr_t src, size_t sizeBytes, h
         hip_internal::memcpyAsync(dst, src, sizeBytes, hipMemcpyDeviceToHost, stream));
 }
 
-// TODO - review and optimize
-hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
-                        size_t height, hipMemcpyKind kind) {
-    if (width > dpitch || width > spitch) return hipErrorUnknown;
-
-    hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
-
-    hc::completion_future marker;
-
-    hipError_t e = hipSuccess;
-
-    try {
-        for (int i = 0; i < height; ++i) {
-            stream->locked_copySync((unsigned char*)dst + i * dpitch,
-                                    (unsigned char*)src + i * spitch, width, kind);
-        }
-    } catch (ihipException& ex) {
-        e = ex._code;
-    }
-
-    return e;
-}
-
-hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
-                       size_t height, hipMemcpyKind kind) {
-    HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind);
-    hipError_t e = hipSuccess;
-    e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind);
-    return ihipLogStatus(e);
-}
-
-hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
-    HIP_INIT_SPECIAL_API((TRACE_MCMD), pCopy);
-    hipError_t e = hipSuccess;
-    if (pCopy == nullptr) {
-        e = hipErrorInvalidValue;
-    }
-    e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->widthInBytes, pCopy->srcHost, pCopy->srcPitch,
-                     pCopy->widthInBytes, pCopy->height, hipMemcpyDefault);
-    return ihipLogStatus(e);
-}
-
-hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
-                            size_t height, hipMemcpyKind kind, hipStream_t stream) {
-    HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
-    if (width > dpitch || width > spitch) return ihipLogStatus(hipErrorUnknown);
-    hipError_t e = hipSuccess;
-    try {
-        for (int i = 0; i < height; ++i) {
-            e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
-                                          (unsigned char*)src + i * spitch, width, kind, stream);
-        }
-    } catch (ihipException& ex) {
-        e = ex._code;
-    }
-
-    return ihipLogStatus(e);
-}
-
 hipError_t hipMemcpy2DToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src,
                               size_t spitch, size_t width, size_t height, hipMemcpyKind kind) {
     HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, wOffset, hOffset, src, spitch, width, height, kind);
@@ -1547,6 +1489,24 @@ inline const T& clamp_integer(const T& x, const T& lower, const T& upper) {
 
     return std::min(upper, std::max(x, lower));
 }
+
+template <typename T>
+__global__ void hip_copy2d_n(T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
+
+    size_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+    size_t idy = blockIdx.y * blockDim.y + threadIdx.y;
+    size_t floorWidth = (width/sizeof(T));
+    T *dstPtr = (T *)((uint8_t*) dst + idy * destPitch);
+    T *srcPtr = (T *)((uint8_t*) src + idy * srcPitch);
+    if((idx < floorWidth) && (idy < height)){
+        dstPtr[idx] = srcPtr[idx];
+    } else if((idx < width) && (idy < height)){
+        size_t bytesToCopy = width - (floorWidth * sizeof(T));
+        dstPtr += floorWidth;
+        srcPtr += floorWidth;
+        __builtin_memcpy(reinterpret_cast<uint8_t*>(dstPtr), reinterpret_cast<const uint8_t*>(srcPtr),bytesToCopy);
+    }
+}
 }  // namespace
 
 template <typename T>
@@ -1559,6 +1519,16 @@ void ihipMemsetKernel(hipStream_t stream, T* ptr, T val, size_t sizeBytes) {
                        sizeBytes, std::move(val));
 }
 
+template <typename T>
+void ihipMemcpy2dKernel(hipStream_t stream, T* dst, const T* src, size_t width, size_t height, size_t destPitch, size_t srcPitch) {
+    size_t threadsPerBlock_x = 64;
+    size_t threadsPerBlock_y = 4;
+    uint32_t grid_dim_x = clamp_integer<size_t>( (width+(threadsPerBlock_x*sizeof(T)-1)) / (threadsPerBlock_x*sizeof(T)), 1, UINT32_MAX);
+    uint32_t grid_dim_y = clamp_integer<size_t>( (height+(threadsPerBlock_y-1)) / threadsPerBlock_y, 1, UINT32_MAX);
+    hipLaunchKernelGGL(hip_copy2d_n, dim3(grid_dim_x,grid_dim_y), dim3(threadsPerBlock_x,threadsPerBlock_y), 0u, stream, dst, src,
+                       width, height, destPitch, srcPitch);
+}
+
 typedef enum ihipMemsetDataType {
     ihipMemsetDataTypeChar   = 0,
     ihipMemsetDataTypeShort  = 1,
@@ -1616,6 +1586,108 @@ hipError_t ihipMemset(void* dst, int  value, size_t sizeBytes, hipStream_t strea
     return e;
 };
 
+int isLockedPointer(const void *ptr)
+{
+    hsa_amd_pointer_info_t info;
+    int isLocked = 0;
+
+    info.size = sizeof(info);
+    hsa_status_t hsa_status = hsa_amd_pointer_info(const_cast<void*>(ptr), &info, nullptr, nullptr, nullptr);
+    if(hsa_status != HSA_STATUS_SUCCESS) {
+        return -1;
+    }
+
+    if((info.type == HSA_EXT_POINTER_TYPE_HSA) || (info.type == HSA_EXT_POINTER_TYPE_LOCKED)) {
+        isLocked = 1;
+    }
+
+    return isLocked;
+}
+
+// TODO - review and optimize
+hipError_t ihipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
+                        size_t height, hipMemcpyKind kind) {
+    if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return hipErrorInvalidValue;
+
+    hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
+    int isLocked = 0;
+    if(kind == hipMemcpyHostToDevice) {
+        isLocked = isLockedPointer(src);
+    } else if(kind == hipMemcpyDeviceToHost) {
+        isLocked = isLockedPointer(dst);
+    }
+
+    hc::completion_future marker;
+
+    hipError_t e = hipSuccess;
+    if((width == dpitch) && (width == spitch)) {
+        stream->locked_copySync((void*)dst, (void*)src, width*height, kind, false);
+    } else {
+        try {
+            if(isLocked) {
+                for (int i = 0; i < height; ++i)
+                    stream->locked_copySync((unsigned char*)dst + i * dpitch,
+                                    (unsigned char*)src + i * spitch, width, kind);
+            } else {
+                ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
+                stream->locked_wait();
+            }
+        } catch (ihipException& ex) {
+            e = ex._code;
+        }
+    }
+
+    return e;
+}
+
+hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
+                       size_t height, hipMemcpyKind kind) {
+    HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind);
+    hipError_t e = hipSuccess;
+    e = ihipMemcpy2D(dst, dpitch, src, spitch, width, height, kind);
+    return ihipLogStatus(e);
+}
+
+hipError_t hipMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width,
+                            size_t height, hipMemcpyKind kind, hipStream_t stream) {
+    HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, dpitch, src, spitch, width, height, kind, stream);
+    if (dst == nullptr || src == nullptr || width > dpitch || width > spitch) return ihipLogStatus(hipErrorInvalidValue);
+    hipError_t e = hipSuccess;
+    int isLocked = 0;
+    if(kind == hipMemcpyHostToDevice) {
+        isLocked = isLockedPointer(src);
+    } else if(kind == hipMemcpyDeviceToHost) {
+        isLocked = isLockedPointer(dst);
+    }
+    if((width == dpitch) && (width == spitch)) {
+            hip_internal::memcpyAsync(dst, src, width*height, kind, stream);
+    } else {
+        try {
+            if(!isLocked){
+                for (int i = 0; i < height; ++i) 
+                    e = hip_internal::memcpyAsync((unsigned char*)dst + i * dpitch,
+                                          (unsigned char*)src + i * spitch, width, kind, stream);
+            } else{
+                ihipMemcpy2dKernel<uint32_t> (stream, static_cast<uint32_t*> (dst), static_cast<const uint32_t*> (src), width, height, dpitch, spitch);
+            }
+        } catch (ihipException& ex) {
+            e = ex._code;
+        }
+    }
+
+    return ihipLogStatus(e);
+}
+
+hipError_t hipMemcpyParam2D(const hip_Memcpy2D* pCopy) {
+    HIP_INIT_SPECIAL_API((TRACE_MCMD), pCopy);
+    hipError_t e = hipSuccess;
+    if (pCopy == nullptr) {
+        e = hipErrorInvalidValue;
+    }
+    e = ihipMemcpy2D(pCopy->dstArray->data, pCopy->widthInBytes, pCopy->srcHost, pCopy->srcPitch,
+                     pCopy->widthInBytes, pCopy->height, hipMemcpyDefault);
+    return ihipLogStatus(e);
+}
 
 // TODO-sync: function is async unless target is pinned host memory - then these are fully sync.
 hipError_t hipMemsetAsync(void* dst, int value, size_t sizeBytes, hipStream_t stream) {
@@ -1698,6 +1770,42 @@ hipError_t hipMemsetD8(hipDeviceptr_t dst, unsigned char value, size_t sizeBytes
     return ihipLogStatus(e);
 }
 
+hipError_t hipMemset3D(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent )
+{
+    HIP_INIT_SPECIAL_API((TRACE_MCMD), &pitchedDevPtr, value, &extent);
+    hipError_t e = hipSuccess;
+
+    hipStream_t stream = hipStreamNull;
+    // TODO - call an ihip memset so HIP_TRACE is correct.
+    stream =  ihipSyncAndResolveStream(stream);
+    if (stream) {
+        size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
+        e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar);
+        stream->locked_wait();
+    } else {
+        e = hipErrorInvalidValue;
+    }
+
+    return ihipLogStatus(e);
+}
+
+hipError_t hipMemset3DAsync(hipPitchedPtr pitchedDevPtr, int  value, hipExtent extent ,hipStream_t stream )
+{
+    HIP_INIT_SPECIAL_API((TRACE_MCMD), &pitchedDevPtr, value, &extent);
+    hipError_t e = hipSuccess;
+
+    // TODO - call an ihip memset so HIP_TRACE is correct.
+    stream =  ihipSyncAndResolveStream(stream);
+    if (stream) {
+        size_t sizeBytes = pitchedDevPtr.pitch * extent.height * extent.depth;
+        e = ihipMemset(pitchedDevPtr.ptr, value, sizeBytes, stream, ihipMemsetDataTypeChar);
+    } else {
+        e = hipErrorInvalidValue;
+    }
+
+    return ihipLogStatus(e);
+}
+
 hipError_t hipMemGetInfo(size_t* free, size_t* total) {
     HIP_INIT_API(free, total);
 
diff --git a/src/hip_module.cpp b/src/hip_module.cpp
index 84451a17a0..a6d486b6de 100644
--- a/src/hip_module.cpp
+++ b/src/hip_module.cpp
@@ -27,10 +27,11 @@ THE SOFTWARE.
 #include "hsa_helpers.hpp"
 #include "trace_helper.h"
 
+#include <hsa/amd_hsa_kernel_code.h>
 #include <hsa/hsa.h>
 #include <hsa/hsa_ext_amd.h>
-#include <hsa/amd_hsa_kernel_code.h>
 
+#include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstdio>
@@ -71,9 +72,8 @@ struct ihipKernArgInfo {
 map<string, ihipKernArgInfo> kernelArguments;
 
 struct ihipModuleSymbol_t {
-    uint64_t _object;  // The kernel object.
-    uint32_t _groupSegmentSize;
-    uint32_t _privateSegmentSize;
+    uint64_t _object{};  // The kernel object.
+    amd_kernel_code_t const* _header{};
     string _name;  // TODO - review for performance cost.  Name is just used for debug.
 };
 
@@ -179,8 +179,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
         aql.grid_size_x = globalWorkSizeX;
         aql.grid_size_y = globalWorkSizeY;
         aql.grid_size_z = globalWorkSizeZ;
-        aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes;
-        aql.private_segment_size = f->_privateSegmentSize;
+        aql.group_segment_size =
+            f->_header->workgroup_group_segment_byte_size + sharedMemBytes;
+        aql.private_segment_size =
+            f->_header->workitem_private_segment_byte_size;
         aql.kernel_object = f->_object;
         aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
         aql.header =
@@ -444,10 +446,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch
 
     if (kernel.handle == 0u) return hipErrorNotFound;
 
-    (*func)->_object = kernel_object(kernel);
-    (*func)->_groupSegmentSize = group_size(kernel);
-    (*func)->_privateSegmentSize = private_size(kernel);
-    (*func)->_name = name;
+    // TODO: refactor the whole ihipThisThat, which is a mess and yields the
+    //       below, due to hipFunction_t being a pointer to ihipModuleSymbol_t.
+    func[0][0] = *static_cast<hipFunction_t>(
+        Kernel_descriptor{kernel_object(kernel), name});
 
     return hipSuccess;
 }
@@ -471,6 +473,65 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
     return ihipLogStatus(r);
 }
 
+namespace
+{
+    inline
+    hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header)
+    {
+        hipFuncAttributes r{};
+
+        hipDeviceProp_t prop{};
+        hipGetDeviceProperties(
+            &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId);
+        // TODO: at the moment there is no way to query the count of registers
+        //       available per CU, therefore we hardcode it to 64 KiRegisters.
+        prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024;
+        
+        r.localSizeBytes = header.workitem_private_segment_byte_size;
+        r.sharedSizeBytes = header.workgroup_group_segment_byte_size;
+        r.maxDynamicSharedSizeBytes =
+            prop.sharedMemPerBlock - r.sharedSizeBytes;
+        r.numRegs = header.workitem_vgpr_count;
+        r.maxThreadsPerBlock = r.numRegs ?
+            std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) :
+            prop.maxThreadsPerBlock;
+        r.binaryVersion =
+            header.amd_machine_version_major * 10 +
+            header.amd_machine_version_minor;
+        r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0.
+
+        return r;
+    }
+}
+
+hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
+{
+    if (!attr) return hipErrorInvalidValue;
+    if (!func) return hipErrorInvalidDeviceFunction;
+
+    const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
+
+    if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
+
+    auto agent = this_agent();
+    const auto it1 = find_if(
+        it0->second.cbegin(),
+        it0->second.cend(),
+        [=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
+        return x.first == agent;
+    });
+
+    if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
+
+    const auto header = static_cast<hipFunction_t>(it1->second)->_header;
+
+    if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};
+
+    *attr = make_function_attributes(*header);
+
+    return hipSuccess;
+}
+
 hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
 
     if (!module) return hipErrorInvalidValue;
@@ -487,7 +548,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
 
     (*module)->executable = hip_impl::load_executable(
         tmp.empty() ? read_elf_file_as_string(image) : tmp, (*module)->executable, this_agent());
-    
+
     return (*module)->executable.handle ? hipSuccess : hipErrorUnknown;
 }
 
diff --git a/src/program_state.cpp b/src/program_state.cpp
index 0c51fe4694..c4478bec2f 100644
--- a/src/program_state.cpp
+++ b/src/program_state.cpp
@@ -382,8 +382,7 @@ const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& fu
                 for (auto&& kernel_symbol : it->second) {
                     r[function.first].emplace_back(
                         agent(kernel_symbol),
-                        Kernel_descriptor{kernel_object(kernel_symbol), group_size(kernel_symbol),
-                                          private_size(kernel_symbol), it->first});
+                        Kernel_descriptor{kernel_object(kernel_symbol), it->first});
                 }
             }
         }
diff --git a/tests/src/deviceLib/hipMathFunctions.cpp b/tests/src/deviceLib/hipMathFunctions.cpp
index 7fe0003672..f89bdae149 100644
--- a/tests/src/deviceLib/hipMathFunctions.cpp
+++ b/tests/src/deviceLib/hipMathFunctions.cpp
@@ -21,7 +21,7 @@ THE SOFTWARE.
 */
 
 /* HIT_START
- * BUILD: %t %s ../test_common.cpp
+ * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
  * RUN: %t
  * HIT_END
  */
@@ -29,7 +29,7 @@ THE SOFTWARE.
 #include "hip/hip_runtime.h"
 #include "test_common.h"
 
-#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
+#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
 
 __global__ void kernel_abs_int64(hipLaunchParm lp, long long *input, long long *output) {
     int tx = threadIdx.x;
diff --git a/tests/src/deviceLib/hipTestHalf.cpp b/tests/src/deviceLib/hipTestHalf.cpp
index 4a63260bf7..5a2aac2b29 100644
--- a/tests/src/deviceLib/hipTestHalf.cpp
+++ b/tests/src/deviceLib/hipTestHalf.cpp
@@ -18,7 +18,7 @@ THE SOFTWARE.
 */
 
 /* HIT_START
- * BUILD: %t %s ../test_common.cpp
+ * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
  * RUN: %t
  * HIT_END
  */
@@ -32,7 +32,7 @@ THE SOFTWARE.
 #define HALF_SIZE 64 * sizeof(__half)
 #define HALF2_SIZE 64 * sizeof(__half2)
 
-#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__
+#if __HIP_ARCH_GFX803__ || __HIP_ARCH_GFX900__ || __HIP_ARCH_GFX906__
 
 __global__ void __halfMath(hipLaunchParm lp, __half* A, __half* B, __half* C) {
     int tx = threadIdx.x;
diff --git a/tests/src/deviceLib/hip_mbcnt.cpp b/tests/src/deviceLib/hip_mbcnt.cpp
new file mode 100644
index 0000000000..0dd7169f51
--- /dev/null
+++ b/tests/src/deviceLib/hip_mbcnt.cpp
@@ -0,0 +1,128 @@
+/*
+Copyright (c) 2015-2018 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc
+ * RUN: %t
+ * HIT_END
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <algorithm>
+#include <stdlib.h>
+#include <iostream>
+#include <hip/hip_runtime.h>
+#include <hip/device_functions.h>
+
+#define HIP_ASSERT(x) (assert((x) == hipSuccess))
+
+__global__ void HIP_kernel(hipLaunchParm lp, unsigned int* mbcnt_lo, unsigned int* mbcnt_hi, unsigned int* lane_id) {
+    int x = blockDim.x * blockIdx.x + threadIdx.x;
+    mbcnt_lo[x] = __mbcnt_lo(0xFFFFFFFF, 0);
+    mbcnt_hi[x] = __mbcnt_hi(0xFFFFFFFF, 0);
+    lane_id[x] = __lane_id();
+}
+ 
+using namespace std;
+
+int main() {
+
+    unsigned int* device_mbcnt_lo;
+    unsigned int* device_mbcnt_hi;
+    unsigned int* device_lane_id;
+
+    hipDeviceProp_t devProp;
+    hipGetDeviceProperties(&devProp, 0);
+    cout << " System minor " << devProp.minor << endl;
+    cout << " System major " << devProp.major << endl;
+    cout << " agent prop name " << devProp.name << endl;
+
+    cout << "hip Device prop succeeded " << endl;
+
+    constexpr unsigned int wave_size = 64;
+    constexpr unsigned int num_waves_per_block = 2;
+    constexpr unsigned int num_threads_per_block = wave_size * num_waves_per_block;
+    constexpr unsigned int num_blocks = 2;
+    constexpr unsigned int num_threads = num_threads_per_block * num_blocks;
+    constexpr size_t buffer_size = num_threads * sizeof(unsigned int);
+
+    HIP_ASSERT(hipMalloc((void**)&device_mbcnt_lo, buffer_size));
+    HIP_ASSERT(hipMalloc((void**)&device_mbcnt_hi, buffer_size));
+    HIP_ASSERT(hipMalloc((void**)&device_lane_id, buffer_size));
+
+    hipLaunchKernel(HIP_kernel, dim3(num_blocks),
+                    dim3(num_threads_per_block), 0, 0, device_mbcnt_lo, device_mbcnt_hi, device_lane_id);
+
+    unsigned int* host_mbcnt_lo = (unsigned int*) malloc(buffer_size);
+    unsigned int* host_mbcnt_hi = (unsigned int*) malloc(buffer_size);
+    unsigned int* host_lane_id = (unsigned int*) malloc(buffer_size);
+
+    HIP_ASSERT(hipMemcpy(host_mbcnt_lo, device_mbcnt_lo, buffer_size, hipMemcpyDeviceToHost));
+    HIP_ASSERT(hipMemcpy(host_mbcnt_hi, device_mbcnt_hi, buffer_size, hipMemcpyDeviceToHost));
+    HIP_ASSERT(hipMemcpy(host_lane_id, device_lane_id, buffer_size, hipMemcpyDeviceToHost));
+
+    // verify the results
+    int mbcnt_lo_errors = 0;
+    int mbcnt_hi_errors = 0;
+    int lane_id_errors = 0;
+    for (unsigned int i = 0; i < num_threads; i++) {
+        unsigned int this_lane_id = i % wave_size;
+        unsigned int this_mbcnt_lo = this_lane_id >= 32 ? 32 : this_lane_id;
+        unsigned int this_mbcnt_hi = this_lane_id < 32 ? 0 : (this_lane_id - 22);
+
+        if (host_mbcnt_lo[i] != this_mbcnt_lo)
+            mbcnt_lo_errors++;
+
+        if (host_mbcnt_hi[i] != this_mbcnt_hi)
+            mbcnt_hi_errors++;
+
+        if (host_lane_id[i] != this_lane_id)
+            lane_id_errors++;
+    }
+
+    if (mbcnt_lo_errors == 0)
+      cout << "__mbcnt_lo() PASSED!" << endl;
+    else
+      cout << "__mbcnt_lo() FAILED!" << endl;
+
+
+    if (mbcnt_hi_errors == 0)
+      cout << "__mbcnt_hi() PASSED!" << endl;
+    else
+      cout << "__mbcnt_hi() FAILED!" << endl;
+
+    if (lane_id_errors == 0)
+      cout << "__lane_id() PASSED!" << endl;
+    else
+      cout << "__lane_id() FAILED!" << endl;
+
+    HIP_ASSERT(hipFree(device_mbcnt_lo));
+    HIP_ASSERT(hipFree(device_mbcnt_hi));
+    HIP_ASSERT(hipFree(device_lane_id));
+
+    free(host_mbcnt_lo);
+    free(host_mbcnt_hi);
+    free(host_lane_id);
+
+    return mbcnt_lo_errors + mbcnt_hi_errors + lane_id_errors;
+}
diff --git a/tests/src/runtimeApi/memory/hipMemset3D.cpp b/tests/src/runtimeApi/memory/hipMemset3D.cpp
new file mode 100644
index 0000000000..40f2f3e67f
--- /dev/null
+++ b/tests/src/runtimeApi/memory/hipMemset3D.cpp
@@ -0,0 +1,98 @@
+/*
+Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+// Simple test for memset.
+// Also serves as a template for other tests.
+
+/* HIT_START
+ * BUILD: %t %s ../../test_common.cpp
+ * //Small copy
+ * RUN: %t -N 10    --memsetval 0x42
+ * HIT_END
+ */
+
+#include "hip/hip_runtime.h"
+#include "test_common.h"
+
+bool testhipMemset3D(int memsetval,int p_gpuDevice)
+{
+    size_t numH = 256;
+    size_t numW = 256;
+    size_t depth = 1;
+    size_t pitch_A;
+    size_t width = numW * sizeof(char);
+    size_t sizeElements = width * numH * depth;
+    size_t elements = numW* numH* depth;
+
+
+	printf ("testhipMemset3D memsetval=%2x device=%d\n", memsetval, p_gpuDevice);
+    char *A_d;
+    char *A_h;
+    bool testResult = true;
+    hipExtent extent = make_hipExtent(width, numH, depth);
+    hipPitchedPtr devPitchedPtr;
+
+    HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
+	A_h = (char*)malloc(sizeElements);
+	HIPASSERT(A_h != NULL);
+	for (size_t i=0; i<elements; i++) {
+        A_h[i] = 1;
+    }
+    hipStream_t stream;
+    //HIPCHECK(hipStreamCreate(&stream));
+	HIPCHECK ( hipMemset3D( devPitchedPtr, memsetval, extent) );
+	//HIPCHECK ( hipMemcpy3D(A_h, width, A_d, pitch_A, numW, numH, hipMemcpyDeviceToHost));
+	hipMemcpy3DParms myparms = {0};
+	myparms.srcPos = make_hipPos(0,0,0);
+	myparms.dstPos = make_hipPos(0,0,0);
+	myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
+	myparms.srcPtr = devPitchedPtr;
+    //myparms.dstArray = cuArray;
+	myparms.extent = extent;
+#ifdef __HIP_PLATFORM_NVCC__
+	myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost);
+#else
+    myparms.kind = hipMemcpyDeviceToHost;
+#endif
+    hipMemcpy3D(&myparms);
+
+    for (int i=0; i<elements; i++) {
+        if (A_h[i] != memsetval) {
+			testResult = false;
+            printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", i, (int)A_h[i], (int)memsetval);
+            break;
+        }
+    }
+    hipFree(A_d);
+    free(A_h);
+    return testResult;
+}
+
+int main(int argc, char *argv[])
+{
+    HipTest::parseStandardArguments(argc, argv, true);
+    bool testResult = false;
+    HIPCHECK(hipSetDevice(p_gpuDevice));
+    testResult = testhipMemset3D(memsetval, p_gpuDevice);
+
+    passed();
+
+}
diff --git a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp
new file mode 100644
index 0000000000..6af87edb25
--- /dev/null
+++ b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp
@@ -0,0 +1,53 @@
+/*
+Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/* HIT_START
+ * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11
+ * RUN: %t
+ * HIT_END
+ */
+
+#include <hip/hip_runtime_api.h>
+#include <iostream>
+#include "test_common.h"
+
+__global__
+void fn(float* px, float* py)
+{
+    bool a[42];
+    __shared__ double b[69];
+
+    for (auto&& x : b) x = *py++;
+    for (auto&& x : a) x = *px++ > 0.0;
+    for (auto&& x : a) if (x) *--py = *--px;
+}
+
+int main() {
+    hipInit(0);
+
+    hipFuncAttributes attr{};
+
+    auto r = hipFuncGetAttributes(&attr, reinterpret_cast<const void*>(&fn));
+
+    if (r != hipSuccess || attr.maxThreadsPerBlock == 0) {
+        failed("Failed to read attributes.");
+    }
+
+    passed();
+}
diff --git a/tests/src/texture/hipBindTexRef1DFetch.cpp b/tests/src/texture/hipBindTexRef1DFetch.cpp
new file mode 100644
index 0000000000..5d9a1b103b
--- /dev/null
+++ b/tests/src/texture/hipBindTexRef1DFetch.cpp
@@ -0,0 +1,95 @@
+/*
+Copyright (c) 2015-present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp
+ * RUN: %t
+ * HIT_END
+ */
+
+#include "hip/hip_runtime.h"
+#include "test_common.h"
+
+#define N 512
+
+texture<float, 1, hipReadModeElementType> tex;
+
+__global__ void kernel(float *out) {
+  int x = blockIdx.x * blockDim.x + threadIdx.x;
+  if(x<N){
+      out[x] = tex1Dfetch(tex, x);
+  }
+}
+
+int runTest(void);
+
+int main(int argc, char **argv) {
+    int testResult = runTest();
+    if (testResult) {
+        passed();
+    } else {
+        exit(EXIT_FAILURE);
+    }
+}
+
+int runTest() {
+    int testResult = 1;
+    float *texBuf;
+    float val[N], output[N];
+    size_t size = 0;
+    float *devBuf;
+    for (int i = 0; i < N; i++) {
+        val[i] = (float)i;
+        output[i] = 0.0;
+    }
+    hipChannelFormatDesc chanDesc =
+        hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
+
+    HIPCHECK(hipMalloc(&texBuf, N * sizeof(float)));
+    HIPCHECK(hipMalloc(&devBuf, N * sizeof(float)));
+    HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice));
+  
+    tex.addressMode[0] = hipAddressModeClamp;
+    tex.addressMode[1] = hipAddressModeClamp;
+    tex.filterMode = hipFilterModePoint;
+    tex.normalized = 0;
+
+    HIPCHECK(hipBindTexture(&size, tex, (void *)texBuf, chanDesc, N * sizeof(float)));
+
+    dim3 dimBlock(64, 1, 1);
+    dim3 dimGrid(N / dimBlock.x, 1, 1);
+
+    hipLaunchKernelGGL(kernel, dim3(dimGrid), dim3(dimBlock), 0, 0, devBuf);
+    HIPCHECK(hipDeviceSynchronize());
+    HIPCHECK(hipMemcpy(output, devBuf, N * sizeof(float), hipMemcpyDeviceToHost));
+    for (int i = 0; i < N; i++) {
+        if (output[i] != val[i]) {
+            testResult = 0;
+            break;
+        }
+    }
+    HIPCHECK(hipUnbindTexture(&tex));
+    HIPCHECK(hipFree(texBuf));
+    HIPCHECK(hipFree(devBuf));
+    return testResult;
+}
diff --git a/tests/src/texture/hipTextureObj1DFetch.cpp b/tests/src/texture/hipTextureObj1DFetch.cpp
new file mode 100644
index 0000000000..faf6541ce1
--- /dev/null
+++ b/tests/src/texture/hipTextureObj1DFetch.cpp
@@ -0,0 +1,99 @@
+/*
+Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+/*HIT_START
+ * BUILD: %t %s ../test_common.cpp
+ * RUN: %t
+ * HIT_END
+ */
+
+#include "hip/hip_runtime.h"
+#include "test_common.h"
+
+#define N 512
+
+__global__ void tex1dKernel(float *val, hipTextureObject_t obj) {
+    int k = blockIdx.x * blockDim.x + threadIdx.x;
+    if (k < N)
+        val[k] = tex1Dfetch<float>(obj, k);
+}
+
+int runTest(void);
+
+int main(int argc, char **argv) {
+    int testResult = runTest();
+    if(testResult) {
+        passed();
+    } else {
+        exit(EXIT_FAILURE);
+    }
+}
+
+int runTest() {
+    int testResult = 1;
+    // Allocating the required buffer on gpu device
+    float *texBuf, *texBufOut;
+    float val[N], output[N];
+    for (int i = 0; i < N; i++) {
+        val[i] = (i + 1) * (i + 1);
+        output[i] = 0.0;
+    }
+    HIPCHECK(hipMalloc(&texBuf, N * sizeof(float)));
+    HIPCHECK(hipMalloc(&texBufOut, N * sizeof(float)));
+    HIPCHECK(hipMemcpy(texBuf, val, N * sizeof(float), hipMemcpyHostToDevice));
+    HIPCHECK(hipMemset(texBufOut, 0, N * sizeof(float)));
+    hipResourceDesc resDescLinear;
+
+    memset(&resDescLinear, 0, sizeof(resDescLinear));
+    resDescLinear.resType = hipResourceTypeLinear;
+    resDescLinear.res.linear.devPtr = texBuf;
+    resDescLinear.res.linear.desc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
+    resDescLinear.res.linear.sizeInBytes = N * sizeof(float);
+
+    hipTextureDesc texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.readMode = hipReadModeElementType;
+
+    // Creating texture object
+    hipTextureObject_t texObj = 0;
+    HIPCHECK(hipCreateTextureObject(&texObj, &resDescLinear, &texDesc, NULL));
+
+    dim3 dimBlock(64, 1, 1);
+    dim3 dimGrid(N / dimBlock.x, 1, 1);
+
+    hipLaunchKernelGGL(tex1dKernel, dim3(dimGrid), dim3(dimBlock), 0, 0,
+                       texBufOut, texObj);
+    HIPCHECK(hipDeviceSynchronize());
+
+    HIPCHECK(hipMemcpy(output, texBufOut, N * sizeof(float), hipMemcpyDeviceToHost));
+
+    for(int i = 0; i < N; i++)
+        if (output[i] != val[i]) {
+            testResult = 0;
+            break;
+        }
+
+    HIPCHECK(hipDestroyTextureObject(texObj));
+    HIPCHECK(hipFree(texBuf));
+    HIPCHECK(hipFree(texBufOut));
+    return testResult;
+}
diff --git a/tests/src/texture/hipTextureObj2D.cpp b/tests/src/texture/hipTextureObj2D.cpp
index 1bf51bc2cb..e214295989 100644
--- a/tests/src/texture/hipTextureObj2D.cpp
+++ b/tests/src/texture/hipTextureObj2D.cpp
@@ -1,108 +1,108 @@
-/* HIT_START
- * BUILD: %t %s ../test_common.cpp
- * RUN: %t
- * HIT_END
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <hip/hip_runtime.h>
-#include "test_common.h"
-
-bool testResult = true;
-
-__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
-                            int height) {
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-    int y = blockIdx.y * blockDim.y + threadIdx.y;
-    outputData[y * width + x] = tex2D<float>(textureObject, x, y);
-}
-
-void runTest(int argc, char** argv);
-
-int main(int argc, char** argv) {
-    runTest(argc, argv);
-
-    if (testResult) {
-        passed();
-    } else {
-        exit(EXIT_FAILURE);
-    }
-}
-
-void runTest(int argc, char** argv) {
-    unsigned int width = 256;
-    unsigned int height = 256;
-    unsigned int size = width * height * sizeof(float);
-    float* hData = (float*)malloc(size);
-    memset(hData, 0, size);
-    for (int i = 0; i < height; i++) {
-        for (int j = 0; j < width; j++) {
-            hData[i * width + j] = i * width + j;
-        }
-    }
-    printf("hData: ");
-    for (int i = 0; i < 64; i++) {
-        printf("%f  ", hData[i]);
-    }
-    printf("\n");
-
-    hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
-    hipArray* hipArray;
-    hipMallocArray(&hipArray, &channelDesc, width, height);
-
-    hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice);
-
-    struct hipResourceDesc resDesc;
-    memset(&resDesc, 0, sizeof(resDesc));
-    resDesc.resType = hipResourceTypeArray;
-    resDesc.res.array.array = hipArray;
-
-    // Specify texture object parameters
-    struct hipTextureDesc texDesc;
-    memset(&texDesc, 0, sizeof(texDesc));
-    texDesc.addressMode[0] = hipAddressModeWrap;
-    texDesc.addressMode[1] = hipAddressModeWrap;
-    texDesc.filterMode = hipFilterModePoint;
-    texDesc.readMode = hipReadModeElementType;
-    texDesc.normalizedCoords = 0;
-
-    // Create texture object
-    hipTextureObject_t textureObject = 0;
-    hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
-
-    float* dData = NULL;
-    hipMalloc((void**)&dData, size);
-
-    dim3 dimBlock(16, 16, 1);
-    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
-
-    hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
-                       width, height);
-
-    hipDeviceSynchronize();
-
-    float* hOutputData = (float*)malloc(size);
-    memset(hOutputData, 0, size);
-    hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
-
-    printf("dData: ");
-    for (int i = 0; i < 64; i++) {
-        printf("%f  ", hOutputData[i]);
-    }
-    printf("\n");
-    for (int i = 0; i < height; i++) {
-        for (int j = 0; j < width; j++) {
-            if (hData[i * width + j] != hOutputData[i * width + j]) {
-                printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
-                       hOutputData[i * width + j]);
-                testResult = false;
-                break;
-            }
-        }
-    }
-    hipDestroyTextureObject(textureObject);
-    hipFree(dData);
-    hipFreeArray(hipArray);
-}
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp
+ * RUN: %t
+ * HIT_END
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <hip/hip_runtime.h>
+#include "test_common.h"
+
+__global__ void tex2DKernel(float* outputData, hipTextureObject_t textureObject, int width,
+                            int height) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    outputData[y * width + x] = tex2D<float>(textureObject, x, y);
+}
+
+int runTest(int argc, char** argv);
+
+int main(int argc, char** argv) {
+    int testResult = runTest(argc, argv);
+
+    if (testResult) {
+        passed();
+    } else {
+        exit(EXIT_FAILURE);
+    }
+}
+
+int runTest(int argc, char** argv) {
+    int testResult = 1;
+    unsigned int width = 256;
+    unsigned int height = 256;
+    unsigned int size = width * height * sizeof(float);
+    float* hData = (float*)malloc(size);
+    memset(hData, 0, size);
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+            hData[i * width + j] = i * width + j;
+        }
+    }
+    printf("hData: ");
+    for (int i = 0; i < 64; i++) {
+        printf("%f  ", hData[i]);
+    }
+    printf("\n");
+
+    hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
+    hipArray* hipArray;
+    hipMallocArray(&hipArray, &channelDesc, width, height);
+
+    hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice);
+
+    struct hipResourceDesc resDesc;
+    memset(&resDesc, 0, sizeof(resDesc));
+    resDesc.resType = hipResourceTypeArray;
+    resDesc.res.array.array = hipArray;
+
+    // Specify texture object parameters
+    struct hipTextureDesc texDesc;
+    memset(&texDesc, 0, sizeof(texDesc));
+    texDesc.addressMode[0] = hipAddressModeWrap;
+    texDesc.addressMode[1] = hipAddressModeWrap;
+    texDesc.filterMode = hipFilterModePoint;
+    texDesc.readMode = hipReadModeElementType;
+    texDesc.normalizedCoords = 0;
+
+    // Create texture object
+    hipTextureObject_t textureObject = 0;
+    hipCreateTextureObject(&textureObject, &resDesc, &texDesc, NULL);
+
+    float* dData = NULL;
+    hipMalloc((void**)&dData, size);
+
+    dim3 dimBlock(16, 16, 1);
+    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
+
+    hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, textureObject,
+                       width, height);
+
+    hipDeviceSynchronize();
+
+    float* hOutputData = (float*)malloc(size);
+    memset(hOutputData, 0, size);
+    hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
+
+    printf("dData: ");
+    for (int i = 0; i < 64; i++) {
+        printf("%f  ", hOutputData[i]);
+    }
+    printf("\n");
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+            if (hData[i * width + j] != hOutputData[i * width + j]) {
+                printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
+                       hOutputData[i * width + j]);
+                testResult = 0;
+                break;
+            }
+        }
+    }
+    hipDestroyTextureObject(textureObject);
+    hipFree(dData);
+    hipFreeArray(hipArray);
+    return testResult;
+}
diff --git a/tests/src/texture/hipTextureRef2D.cpp b/tests/src/texture/hipTextureRef2D.cpp
index c4c0b9e2fe..b912f789a7 100644
--- a/tests/src/texture/hipTextureRef2D.cpp
+++ b/tests/src/texture/hipTextureRef2D.cpp
@@ -1,93 +1,93 @@
-/* HIT_START
- * BUILD: %t %s ../test_common.cpp
- * RUN: %t
- * HIT_END
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <hip/hip_runtime.h>
-#include "test_common.h"
-texture<float, 2, hipReadModeElementType> tex;
-
-bool testResult = true;
-
-__global__ void tex2DKernel(float* outputData,
-                            int width, int height) {
-    int x = blockIdx.x * blockDim.x + threadIdx.x;
-    int y = blockIdx.y * blockDim.y + threadIdx.y;
-    outputData[y * width + x] = tex2D(tex, x, y);
-}
-
-void runTest(int argc, char** argv);
-
-int main(int argc, char** argv) {
-    runTest(argc, argv);
-    if (testResult) {
-        passed();
-    } else {
-        exit(EXIT_FAILURE);
-    }
-}
-
-void runTest(int argc, char** argv) {
-    unsigned int width = 256;
-    unsigned int height = 256;
-    unsigned int size = width * height * sizeof(float);
-    float* hData = (float*)malloc(size);
-    memset(hData, 0, size);
-    for (int i = 0; i < height; i++) {
-        for (int j = 0; j < width; j++) {
-            hData[i * width + j] = i * width + j;
-        }
-    }
-    printf("hData: ");
-    for (int i = 0; i < 64; i++) {
-        printf("%f  ", hData[i]);
-    }
-    printf("\n");
-
-    hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
-    hipArray* hipArray;
-    hipMallocArray(&hipArray, &channelDesc, width, height);
-
-    hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice);
-
-    tex.addressMode[0] = hipAddressModeWrap;
-    tex.addressMode[1] = hipAddressModeWrap;
-    tex.filterMode = hipFilterModePoint;
-    tex.normalized = 0;
-
-    hipBindTextureToArray(tex, hipArray, channelDesc);
-
-    float* dData = NULL;
-    hipMalloc((void**)&dData, size);
-
-    dim3 dimBlock(16, 16, 1);
-    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
-    hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width, height);
-    hipDeviceSynchronize();
-
-    float* hOutputData = (float*)malloc(size);
-    memset(hOutputData, 0, size);
-    hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
-
-    printf("dData: ");
-    for (int i = 0; i < 64; i++) {
-        printf("%f  ", hOutputData[i]);
-    }
-    printf("\n");
-    for (int i = 0; i < height; i++) {
-        for (int j = 0; j < width; j++) {
-            if (hData[i * width + j] != hOutputData[i * width + j]) {
-                printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
-                       hOutputData[i * width + j]);
-                testResult = false;
-                break;
-            }
-        }
-    }
-    hipFree(dData);
-    hipFreeArray(hipArray);
-}
+/* HIT_START
+ * BUILD: %t %s ../test_common.cpp
+ * RUN: %t
+ * HIT_END
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <hip/hip_runtime.h>
+#include "test_common.h"
+texture<float, 2, hipReadModeElementType> tex;
+
+__global__ void tex2DKernel(float* outputData,
+                            int width, int height) {
+    int x = blockIdx.x * blockDim.x + threadIdx.x;
+    int y = blockIdx.y * blockDim.y + threadIdx.y;
+    outputData[y * width + x] = tex2D(tex, x, y);
+}
+
+int runTest(int argc, char** argv);
+
+int main(int argc, char** argv) {
+    int testResult = runTest(argc, argv);
+    if (testResult) {
+        passed();
+    } else {
+        exit(EXIT_FAILURE);
+    }
+}
+
+int runTest(int argc, char** argv) {
+    int testResult = 1;
+    unsigned int width = 256;
+    unsigned int height = 256;
+    unsigned int size = width * height * sizeof(float);
+    float* hData = (float*)malloc(size);
+    memset(hData, 0, size);
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+            hData[i * width + j] = i * width + j;
+        }
+    }
+    printf("hData: ");
+    for (int i = 0; i < 64; i++) {
+        printf("%f  ", hData[i]);
+    }
+    printf("\n");
+
+    hipChannelFormatDesc channelDesc = hipCreateChannelDesc(32, 0, 0, 0, hipChannelFormatKindFloat);
+    hipArray* hipArray;
+    hipMallocArray(&hipArray, &channelDesc, width, height);
+
+    hipMemcpyToArray(hipArray, 0, 0, hData, size, hipMemcpyHostToDevice);
+
+    tex.addressMode[0] = hipAddressModeWrap;
+    tex.addressMode[1] = hipAddressModeWrap;
+    tex.filterMode = hipFilterModePoint;
+    tex.normalized = 0;
+
+    hipBindTextureToArray(tex, hipArray, channelDesc);
+
+    float* dData = NULL;
+    hipMalloc((void**)&dData, size);
+
+    dim3 dimBlock(16, 16, 1);
+    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);
+    hipLaunchKernelGGL(tex2DKernel, dim3(dimGrid), dim3(dimBlock), 0, 0, dData, width, height);
+    hipDeviceSynchronize();
+
+    float* hOutputData = (float*)malloc(size);
+    memset(hOutputData, 0, size);
+    hipMemcpy(hOutputData, dData, size, hipMemcpyDeviceToHost);
+
+    printf("dData: ");
+    for (int i = 0; i < 64; i++) {
+        printf("%f  ", hOutputData[i]);
+    }
+    printf("\n");
+    for (int i = 0; i < height; i++) {
+        for (int j = 0; j < width; j++) {
+            if (hData[i * width + j] != hOutputData[i * width + j]) {
+                printf("Difference [ %d %d ]:%f ----%f\n", i, j, hData[i * width + j],
+                       hOutputData[i * width + j]);
+                testResult = 0;
+                break;
+            }
+        }
+    }
+    hipFree(dData);
+    hipFreeArray(hipArray);
+    return testResult;
+}