diff --git a/projects/hip/clang-hipify/src/Cuda2Hip.cpp b/projects/hip/clang-hipify/src/Cuda2Hip.cpp index 0a486a6596..a258825091 100644 --- a/projects/hip/clang-hipify/src/Cuda2Hip.cpp +++ b/projects/hip/clang-hipify/src/Cuda2Hip.cpp @@ -46,6 +46,7 @@ THE SOFTWARE. #include #include +#include using namespace clang; using namespace clang::ast_matchers; @@ -69,226 +70,885 @@ enum ConvTypes { CONV_OTHER, CONV_INCLUDE, CONV_LITERAL, + CONV_BLAS, CONV_LAST }; const char *counterNames[ConvTypes::CONV_LAST] = { "dev", "mem", "kern", "coord_func", "math_func", "special_func", "stream", "event", "err", "def", - "tex", "other", "include", "literal"}; + "tex", "other", "include", "literal", "blas"}; namespace { struct cuda2hipMap { cuda2hipMap() { - // defines + + // Replacement Excludes + cudaExcludes = {"CHECK_CUDA_ERROR", "CUDA_SAFE_CALL"}; + + // Defines cuda2hipRename["__CUDACC__"] = {"__HIPCC__", CONV_DEF}; - // includes - cuda2hipRename["cuda_runtime.h"] = {"hip_runtime.h", CONV_INCLUDE}; + // CUDA includes + cuda2hipRename["cuda_runtime.h"] = {"hip_runtime.h", CONV_INCLUDE}; cuda2hipRename["cuda_runtime_api.h"] = {"hip_runtime_api.h", CONV_INCLUDE}; - // Error codes and return types: - cuda2hipRename["cudaError_t"] = {"hipError_t", CONV_ERR}; - cuda2hipRename["cudaError"] = {"hipError", CONV_ERR}; - cuda2hipRename["cudaSuccess"] = {"hipSuccess", CONV_ERR}; + // HIP includes + cuda2hipRename["cudacommon.h.prehip"] = {"cudacommon.h", CONV_INCLUDE}; - cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR}; - cuda2hipRename["cudaErrorMemoryAllocation"] = {"hipErrorMemoryAllocation", - CONV_ERR}; - cuda2hipRename["cudaErrorMemoryFree"] = {"hipErrorMemoryFree", CONV_ERR}; - cuda2hipRename["cudaErrorUnknownSymbol"] = {"hipErrorUnknownSymbol", - CONV_ERR}; - cuda2hipRename["cudaErrorOutOfResources"] = {"hipErrorOutOfResources", - CONV_ERR}; - cuda2hipRename["cudaErrorInvalidValue"] = {"hipErrorInvalidValue", - CONV_ERR}; - cuda2hipRename["cudaErrorInvalidResourceHandle"] = { - "hipErrorInvalidResourceHandle", CONV_ERR}; - cuda2hipRename["cudaErrorInvalidDevice"] = {"hipErrorInvalidDevice", - CONV_ERR}; - cuda2hipRename["cudaErrorNoDevice"] = {"hipErrorNoDevice", CONV_ERR}; - cuda2hipRename["cudaErrorNotReady"] = {"hipErrorNotReady", CONV_ERR}; - cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR}; + // CUBLAS includes + cuda2hipRename["cublas.h"] = {"hipblas.h", CONV_INCLUDE}; + cuda2hipRename["cublas_v2.h"] = {"hipblas.h", CONV_INCLUDE}; - // error APIs: - cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR}; - cuda2hipRename["cudaPeekAtLastError"] = {"hipPeekAtLastError", CONV_ERR}; - cuda2hipRename["cudaGetErrorName"] = {"hipGetErrorName", CONV_ERR}; - cuda2hipRename["cudaGetErrorString"] = {"hipGetErrorString", CONV_ERR}; + // Error codes and return types + cuda2hipRename["cudaError_t"] = {"hipError_t", CONV_ERR}; + cuda2hipRename["cudaError"] = {"hipError", CONV_ERR}; + cuda2hipRename["cudaSuccess"] = {"hipSuccess", CONV_ERR}; + cuda2hipRename["cudaErrorUnknown"] = {"hipErrorUnknown", CONV_ERR}; + cuda2hipRename["cudaErrorMemoryAllocation"] = {"hipErrorMemoryAllocation", CONV_ERR}; + cuda2hipRename["cudaErrorMemoryFree"] = {"hipErrorMemoryFree", CONV_ERR}; + cuda2hipRename["cudaErrorUnknownSymbol"] = {"hipErrorUnknownSymbol", CONV_ERR}; + cuda2hipRename["cudaErrorOutOfResources"] = {"hipErrorOutOfResources", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidValue"] = {"hipErrorInvalidValue", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidResourceHandle"] = {"hipErrorInvalidResourceHandle", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidDevice"] = {"hipErrorInvalidDevice", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidMemcpyDirection"] = {"hipErrorInvalidMemcpyDirection", CONV_ERR}; + cuda2hipRename["cudaErrorInvalidDevicePointer"] = {"hipErrorInvalidDevicePointer", CONV_ERR}; + cuda2hipRename["cudaErrorInitializationError"] = {"hipErrorInvalidDevicePointer", CONV_ERR}; + cuda2hipRename["cudaErrorNoDevice"] = {"hipErrorNoDevice", CONV_ERR}; + cuda2hipRename["cudaErrorNotReady"] = {"hipErrorNotReady", CONV_ERR}; + cuda2hipRename["cudaErrorPeerAccessNotEnabled"] = {"hipErrorPeerAccessNotEnabled", CONV_ERR}; + cuda2hipRename["cudaErrorPeerAccessAlreadyEnabled"] = {"hipErrorPeerAccessAlreadyEnabled", CONV_ERR}; + // NOTE: no corresponding error type in CUDA + //cuda2hipRename["cudaErrorRuntimeMemory"] = {"hipErrorRuntimeMemory", CONV_ERR}; + //cuda2hipRename["cudaErrorRuntimeOther"] = {"hipErrorRuntimeOther", CONV_ERR}; + cuda2hipRename["cudaErrorHostMemoryAlreadyRegistered"] = {"hipErrorHostMemoryAlreadyRegistered", CONV_ERR}; + cuda2hipRename["cudaErrorHostMemoryNotRegistered"] = {"hipErrorHostMemoryNotRegistered", CONV_ERR}; + + // Error API + cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR}; + cuda2hipRename["cudaPeekAtLastError"] = {"hipPeekAtLastError", CONV_ERR}; + cuda2hipRename["cudaGetErrorName"] = {"hipGetErrorName", CONV_ERR}; + cuda2hipRename["cudaGetErrorString"] = {"hipGetErrorString", CONV_ERR}; // Memcpy - cuda2hipRename["cudaMemcpy"] = {"hipMemcpy", CONV_MEM}; - cuda2hipRename["cudaMemcpyHostToHost"] = {"hipMemcpyHostToHost", CONV_MEM}; - cuda2hipRename["cudaMemcpyHostToDevice"] = {"hipMemcpyHostToDevice", - CONV_MEM}; - cuda2hipRename["cudaMemcpyDeviceToHost"] = {"hipMemcpyDeviceToHost", - CONV_MEM}; - cuda2hipRename["cudaMemcpyDeviceToDevice"] = {"hipMemcpyDeviceToDevice", - CONV_MEM}; - cuda2hipRename["cudaMemcpyDefault"] = {"hipMemcpyDefault", CONV_MEM}; - cuda2hipRename["cudaMemcpyToSymbol"] = {"hipMemcpyToSymbol", CONV_MEM}; - cuda2hipRename["cudaMemset"] = {"hipMemset", CONV_MEM}; - cuda2hipRename["cudaMemsetAsync"] = {"hipMemsetAsync", CONV_MEM}; - cuda2hipRename["cudaMemcpyAsync"] = {"hipMemcpyAsync", CONV_MEM}; - cuda2hipRename["cudaMemGetInfo"] = {"hipMemGetInfo", CONV_MEM}; - cuda2hipRename["cudaMemcpyKind"] = {"hipMemcpyKind", CONV_MEM}; + cuda2hipRename["cudaMemcpy"] = {"hipMemcpy", CONV_MEM}; + cuda2hipRename["cudaMemcpyHostToHost"] = {"hipMemcpyHostToHost", CONV_MEM}; + cuda2hipRename["cudaMemcpyHostToDevice"] = {"hipMemcpyHostToDevice", CONV_MEM}; + cuda2hipRename["cudaMemcpyDeviceToHost"] = {"hipMemcpyDeviceToHost", CONV_MEM}; + cuda2hipRename["cudaMemcpyDeviceToDevice"] = {"hipMemcpyDeviceToDevice", CONV_MEM}; + cuda2hipRename["cudaMemcpyDefault"] = {"hipMemcpyDefault", CONV_MEM}; + cuda2hipRename["cudaMemcpyToSymbol"] = {"hipMemcpyToSymbol", CONV_MEM}; + cuda2hipRename["cudaMemset"] = {"hipMemset", CONV_MEM}; + cuda2hipRename["cudaMemsetAsync"] = {"hipMemsetAsync", CONV_MEM}; + cuda2hipRename["cudaMemcpyAsync"] = {"hipMemcpyAsync", CONV_MEM}; + cuda2hipRename["cudaMemGetInfo"] = {"hipMemGetInfo", CONV_MEM}; + cuda2hipRename["cudaMemcpyKind"] = {"hipMemcpyKind", CONV_MEM}; - // Memory management : - cuda2hipRename["cudaMalloc"] = {"hipMalloc", CONV_MEM}; + // Memory management + cuda2hipRename["cudaMalloc"] = {"hipMalloc", CONV_MEM}; cuda2hipRename["cudaMallocHost"] = {"hipHostAlloc", CONV_MEM}; - cuda2hipRename["cudaFree"] = {"hipFree", CONV_MEM}; - cuda2hipRename["cudaFreeHost"] = {"hipHostFree", CONV_MEM}; + cuda2hipRename["cudaFree"] = {"hipFree", CONV_MEM}; + cuda2hipRename["cudaFreeHost"] = {"hipHostFree", CONV_MEM}; - // Coordinate Indexing and Dimensions: + // Coordinate Indexing and Dimensions cuda2hipRename["threadIdx.x"] = {"hipThreadIdx_x", CONV_COORD_FUNC}; cuda2hipRename["threadIdx.y"] = {"hipThreadIdx_y", CONV_COORD_FUNC}; cuda2hipRename["threadIdx.z"] = {"hipThreadIdx_z", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; - cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.x"] = {"hipBlockIdx_x", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.y"] = {"hipBlockIdx_y", CONV_COORD_FUNC}; + cuda2hipRename["blockIdx.z"] = {"hipBlockIdx_z", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; - cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.x"] = {"hipBlockDim_x", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.y"] = {"hipBlockDim_y", CONV_COORD_FUNC}; + cuda2hipRename["blockDim.z"] = {"hipBlockDim_z", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; - cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.x"] = {"hipGridDim_x", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.y"] = {"hipGridDim_y", CONV_COORD_FUNC}; + cuda2hipRename["gridDim.z"] = {"hipGridDim_z", CONV_COORD_FUNC}; - cuda2hipRename["warpSize"] = {"hipWarpSize", CONV_SPECIAL_FUNC}; + cuda2hipRename["warpSize"] = {"hipWarpSize", CONV_SPECIAL_FUNC}; // Events - cuda2hipRename["cudaEvent_t"] = {"hipEvent_t", CONV_EVENT}; - cuda2hipRename["cudaEventCreate"] = {"hipEventCreate", CONV_EVENT}; - cuda2hipRename["cudaEventCreateWithFlags"] = {"hipEventCreateWithFlags", - CONV_EVENT}; - cuda2hipRename["cudaEventDestroy"] = {"hipEventDestroy", CONV_EVENT}; - cuda2hipRename["cudaEventRecord"] = {"hipEventRecord", CONV_EVENT}; - cuda2hipRename["cudaEventElapsedTime"] = {"hipEventElapsedTime", - CONV_EVENT}; - cuda2hipRename["cudaEventSynchronize"] = {"hipEventSynchronize", - CONV_EVENT}; + cuda2hipRename["cudaEvent_t"] = {"hipEvent_t", CONV_EVENT}; + cuda2hipRename["cudaEventCreate"] = {"hipEventCreate", CONV_EVENT}; + cuda2hipRename["cudaEventCreateWithFlags"] = {"hipEventCreateWithFlags", CONV_EVENT}; + cuda2hipRename["cudaEventDestroy"] = {"hipEventDestroy", CONV_EVENT}; + cuda2hipRename["cudaEventRecord"] = {"hipEventRecord", CONV_EVENT}; + cuda2hipRename["cudaEventElapsedTime"] = {"hipEventElapsedTime", CONV_EVENT}; + cuda2hipRename["cudaEventSynchronize"] = {"hipEventSynchronize", CONV_EVENT}; // Streams - cuda2hipRename["cudaStream_t"] = {"hipStream_t", CONV_STREAM}; - cuda2hipRename["cudaStreamCreate"] = {"hipStreamCreate", CONV_STREAM}; - cuda2hipRename["cudaStreamCreateWithFlags"] = {"hipStreamCreateWithFlags", - CONV_STREAM}; - cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM}; - cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEven", CONV_STREAM}; - cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", - CONV_STREAM}; - cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM}; - cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", - CONV_STREAM}; + cuda2hipRename["cudaStream_t"] = {"hipStream_t", CONV_STREAM}; + cuda2hipRename["cudaStreamCreate"] = {"hipStreamCreate", CONV_STREAM}; + cuda2hipRename["cudaStreamCreateWithFlags"] = {"hipStreamCreateWithFlags", CONV_STREAM}; + cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM}; + cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEven", CONV_STREAM}; + cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM}; + cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM}; + cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_STREAM}; // Other synchronization - cuda2hipRename["cudaDeviceSynchronize"] = {"hipDeviceSynchronize", - CONV_DEV}; - cuda2hipRename["cudaThreadSynchronize"] = { - "hipDeviceSynchronize", - CONV_DEV}; // translate deprecated cudaThreadSynchronize - cuda2hipRename["cudaDeviceReset"] = {"hipDeviceReset", CONV_DEV}; - cuda2hipRename["cudaThreadExit"] = { - "hipDeviceReset", CONV_DEV}; // translate deprecated cudaThreadExit - cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV}; - cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV}; + cuda2hipRename["cudaDeviceSynchronize"] = {"hipDeviceSynchronize", CONV_DEV}; + // translate deprecated cudaThreadSynchronize + cuda2hipRename["cudaThreadSynchronize"] = {"hipDeviceSynchronize", CONV_DEV}; + cuda2hipRename["cudaDeviceReset"] = {"hipDeviceReset", CONV_DEV}; + // translate deprecated cudaThreadExit + cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_DEV}; + cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV}; + cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV}; - // Attribute - cuda2hipRename["bcudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_DEV}; - cuda2hipRename["bcudaDeviceGetAttribute"] = {"hipDeviceGetAttribute", - CONV_DEV}; + // Attributes + cuda2hipRename["cudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_DEV}; + cuda2hipRename["cudaDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV}; + + cuda2hipRename["cudaDevAttrMaxThreadsPerBlock"] = {"hipDeviceAttributeMaxThreadsPerBlock", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxBlockDimX"] = {"hipDeviceAttributeMaxBlockDimX", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxBlockDimY"] = {"hipDeviceAttributeMaxBlockDimY", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxBlockDimZ"] = {"hipDeviceAttributeMaxBlockDimZ", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxGridDimX"] = {"hipDeviceAttributeMaxGridDimX", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxGridDimY"] = {"hipDeviceAttributeMaxGridDimY", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxGridDimZ"] = {"hipDeviceAttributeMaxGridDimZ", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxSharedMemoryPerBlock"] = {"hipDeviceAttributeMaxSharedMemoryPerBlock", CONV_DEV}; + cuda2hipRename["cudaDevAttrTotalConstantMemory"] = {"hipDeviceAttributeTotalConstantMemory", CONV_DEV}; + cuda2hipRename["cudaDevAttrWarpSize"] = {"hipDeviceAttributeWarpSize", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxRegistersPerBlock"] = {"hipDeviceAttributeMaxRegistersPerBlock", CONV_DEV}; + cuda2hipRename["cudaDevAttrClockRate"] = {"hipDeviceAttributeClockRate", CONV_DEV}; + cuda2hipRename["cudaDevAttrMemoryClockRate"] = {"hipDeviceAttributeMemoryClockRate", CONV_DEV}; + cuda2hipRename["cudaDevAttrGlobalMemoryBusWidth"] = {"hipDeviceAttributeMemoryBusWidth", CONV_DEV}; + cuda2hipRename["cudaDevAttrMultiProcessorCount"] = {"hipDeviceAttributeMultiprocessorCount", CONV_DEV}; + cuda2hipRename["cudaDevAttrComputeMode"] = {"hipDeviceAttributeComputeMode", CONV_DEV}; + cuda2hipRename["cudaDevAttrL2CacheSize"] = {"hipDeviceAttributeL2CacheSize", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxThreadsPerMultiProcessor"] = {"hipDeviceAttributeMaxThreadsPerMultiProcessor", CONV_DEV}; + cuda2hipRename["cudaDevAttrComputeCapabilityMajor"] = {"hipDeviceAttributeComputeCapabilityMajor", CONV_DEV}; + cuda2hipRename["cudaDevAttrConcurrentKernels"] = {"hipDeviceAttributeConcurrentKernels", CONV_DEV}; + cuda2hipRename["cudaDevAttrPciBusId"] = {"hipDeviceAttributePciBusId", CONV_DEV}; + cuda2hipRename["cudaDevAttrPciDeviceId"] = {"hipDeviceAttributePciDeviceId", CONV_DEV}; + cuda2hipRename["cudaDevAttrMaxSharedMemoryPerMultiprocessor"] = {"hipDeviceAttributeMaxSharedMemoryPerMultiprocessor", CONV_DEV}; + cuda2hipRename["cudaDevAttrIsMultiGpuBoard"] = {"hipDeviceAttributeIsMultiGpuBoard", CONV_DEV}; // Device - cuda2hipRename["cudaDeviceProp"] = {"hipDeviceProp_t", CONV_DEV}; - cuda2hipRename["cudaGetDeviceProperties"] = {"hipGetDeviceProperties", - CONV_DEV}; + cuda2hipRename["cudaDeviceProp"] = {"hipDeviceProp_t", CONV_DEV}; + cuda2hipRename["cudaGetDeviceProperties"] = {"hipGetDeviceProperties", CONV_DEV}; // Cache config - cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", - CONV_DEV}; - cuda2hipRename["cudaThreadSetCacheConfig"] = { - "hipDeviceSetCacheConfig", CONV_DEV}; // translate deprecated - cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", - CONV_DEV}; - cuda2hipRename["cudaThreadGetCacheConfig"] = { - "hipDeviceGetCacheConfig", CONV_DEV}; // translate deprecated - cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV}; - cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", - CONV_DEV}; - cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", - CONV_DEV}; - cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", - CONV_DEV}; - cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", - CONV_DEV}; - // function - cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", - CONV_DEV}; + cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV}; + // translate deprecated + cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV}; + cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV}; + // translate deprecated + cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV}; + cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_DEV}; + cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_DEV}; + cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_DEV}; + // Driver/Runtime cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV}; - // cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", - // CONV_DEV}; + cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV}; + // unsupported yet + //cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_DEV}; // Peer2Peer - cuda2hipRename["cudaDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", - CONV_DEV}; - cuda2hipRename["cudaDeviceDisablePeerAccess"] = { - "hipDeviceDisablePeerAccess", CONV_DEV}; - cuda2hipRename["cudaDeviceEnablePeerAccess"] = {"hipDeviceEnablePeerAccess", - CONV_DEV}; - cuda2hipRename["cudaMemcpyPeerAsync"] = {"hipMemcpyPeerAsync", CONV_MEM}; - cuda2hipRename["cudaMemcpyPeer"] = {"hipMemcpyPeer", CONV_MEM}; + cuda2hipRename["cudaDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV}; + cuda2hipRename["cudaDeviceDisablePeerAccess"] = {"hipDeviceDisablePeerAccess", CONV_DEV}; + cuda2hipRename["cudaDeviceEnablePeerAccess"] = {"hipDeviceEnablePeerAccess", CONV_DEV}; + cuda2hipRename["cudaMemcpyPeerAsync"] = {"hipMemcpyPeerAsync", CONV_MEM}; + cuda2hipRename["cudaMemcpyPeer"] = {"hipMemcpyPeer", CONV_MEM}; - // Shared mem: - cuda2hipRename["cudaDeviceSetSharedMemConfig"] = { - "hipDeviceSetSharedMemConfig", CONV_DEV}; - cuda2hipRename["cudaThreadSetSharedMemConfig"] = { - "hipDeviceSetSharedMemConfig", CONV_DEV}; // translate deprecated - cuda2hipRename["cudaDeviceGetSharedMemConfig"] = { - "hipDeviceGetSharedMemConfig", CONV_DEV}; - cuda2hipRename["cudaThreadGetSharedMemConfig"] = { - "hipDeviceGetSharedMemConfig", CONV_DEV}; // translate deprecated - cuda2hipRename["cudaSharedMemConfig"] = {"hipSharedMemConfig", CONV_DEV}; - cuda2hipRename["cudaSharedMemBankSizeDefault"] = { - "hipSharedMemBankSizeDefault", CONV_DEV}; - cuda2hipRename["cudaSharedMemBankSizeFourByte"] = { - "hipSharedMemBankSizeFourByte", CONV_DEV}; - cuda2hipRename["cudaSharedMemBankSizeEightByte"] = { - "hipSharedMemBankSizeEightByte", CONV_DEV}; - - cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV}; + // Shared memory + cuda2hipRename["cudaDeviceSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEV}; + // translate deprecated + cuda2hipRename["cudaThreadSetSharedMemConfig"] = {"hipDeviceSetSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaDeviceGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEV}; + // translate deprecated + cuda2hipRename["cudaThreadGetSharedMemConfig"] = {"hipDeviceGetSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaSharedMemConfig"] = {"hipSharedMemConfig", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeDefault"] = {"hipSharedMemBankSizeDefault", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeFourByte"] = {"hipSharedMemBankSizeFourByte", CONV_DEV}; + cuda2hipRename["cudaSharedMemBankSizeEightByte"] = {"hipSharedMemBankSizeEightByte", CONV_DEV}; // Profiler - // cuda2hipRename["cudaProfilerInitialize"] = "hipProfilerInitialize"; // - // see if these are called anywhere. - cuda2hipRename["cudaProfilerStart"] = {"hipProfilerStart", CONV_OTHER}; - cuda2hipRename["cudaProfilerStop"] = {"hipProfilerStop", CONV_OTHER}; + // unsupported yet + //cuda2hipRename["cudaProfilerInitialize"] = {"hipProfilerInitialize", CONV_OTHER}; + cuda2hipRename["cudaProfilerStart"] = {"hipProfilerStart", CONV_OTHER}; + cuda2hipRename["cudaProfilerStop"] = {"hipProfilerStop", CONV_OTHER}; + cuda2hipRename["cudaChannelFormatDesc"] = {"hipChannelFormatDesc", CONV_TEX}; + cuda2hipRename["cudaFilterModePoint"] = {"hipFilterModePoint", CONV_TEX}; + cuda2hipRename["cudaReadModeElementType"] = {"hipReadModeElementType", CONV_TEX}; - cuda2hipRename["cudaChannelFormatDesc"] = {"hipChannelFormatDesc", - CONV_TEX}; - cuda2hipRename["cudaFilterModePoint"] = {"hipFilterModePoint", CONV_TEX}; - cuda2hipRename["cudaReadModeElementType"] = {"hipReadModeElementType", - CONV_TEX}; + // Channel descriptor + cuda2hipRename["cudaCreateChannelDesc"] = {"hipCreateChannelDesc", CONV_TEX}; + cuda2hipRename["cudaBindTexture"] = {"hipBindTexture", CONV_TEX}; + cuda2hipRename["cudaUnbindTexture"] = {"hipUnbindTexture", CONV_TEX}; - cuda2hipRename["cudaCreateChannelDesc"] = {"hipCreateChannelDesc", - CONV_TEX}; - cuda2hipRename["cudaBindTexture"] = {"hipBindTexture", CONV_TEX}; - cuda2hipRename["cudaUnbindTexture"] = {"hipUnbindTexture", CONV_TEX}; + //---------------------------------------BLAS-------------------------------------// + // Blas types + cuda2hipRename["cublasHandle_t"] = {"hipblasHandle_t", CONV_BLAS}; + // Blas operations + cuda2hipRename["cublasOperation_t"] = {"hipblasOperation_t", CONV_BLAS}; + cuda2hipRename["CUBLAS_OP_N"] = {"HIPBLAS_OP_N", CONV_BLAS}; + cuda2hipRename["CUBLAS_OP_T"] = {"HIPBLAS_OP_T", CONV_BLAS}; + cuda2hipRename["CUBLAS_OP_C"] = {"HIPBLAS_OP_C", CONV_BLAS}; + // Blas statuses + cuda2hipRename["cublasStatus_t"] = {"hipblasStatus_t", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_SUCCESS"] = {"HIPBLAS_STATUS_SUCCESS", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_NOT_INITIALIZED"] = {"HIPBLAS_STATUS_NOT_INITIALIZED", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_ALLOC_FAILED"] = {"HIPBLAS_STATUS_ALLOC_FAILED", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_INVALID_VALUE"] = {"HIPBLAS_STATUS_INVALID_VALUE", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_MAPPING_ERROR"] = {"HIPBLAS_STATUS_MAPPING_ERROR", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_EXECUTION_FAILED"] = {"HIPBLAS_STATUS_EXECUTION_FAILED", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_INTERNAL_ERROR"] = {"HIPBLAS_STATUS_INTERNAL_ERROR", CONV_BLAS}; + cuda2hipRename["CUBLAS_STATUS_NOT_SUPPORTED"] = {"HIPBLAS_STATUS_INTERNAL_ERROR", CONV_BLAS}; + // Blas Fill Modes + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasFillMode_t"] = {"hipblasFillMode_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_FILL_MODE_LOWER"] = {"HIPBLAS_FILL_MODE_LOWER", CONV_BLAS}; + //cuda2hipRename["CUBLAS_FILL_MODE_UPPER"] = {"HIPBLAS_FILL_MODE_UPPER", CONV_BLAS}; + // Blas Diag Types + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasDiagType_t"] = {"hipblasDiagType_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DIAG_NON_UNIT"] = {"HIPBLAS_DIAG_NON_UNIT", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DIAG_UNIT"] = {"HIPBLAS_DIAG_UNIT", CONV_BLAS}; + // Blas Side Modes + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasSideMode_t"] = {"hipblasSideMode_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_SIDE_LEFT"] = {"HIPBLAS_SIDE_LEFT", CONV_BLAS}; + //cuda2hipRename["CUBLAS_SIDE_RIGHT"] = {"HIPBLAS_SIDE_RIGHT", CONV_BLAS}; + // Blas Pointer Modes + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasPointerMode_t"] = {"hipblasPointerMode_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_POINTER_MODE_HOST"] = {"HIPBLAS_POINTER_MODE_HOST", CONV_BLAS}; + //cuda2hipRename["CUBLAS_POINTER_MODE_DEVICE"] = {"HIPBLAS_POINTER_MODE_DEVICE", CONV_BLAS}; + // Blas Atomics Modes + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasAtomicsMode_t"] = {"hipblasAtomicsMode_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_ATOMICS_NOT_ALLOWED"] = {"HIPBLAS_ATOMICS_NOT_ALLOWED", CONV_BLAS}; + //cuda2hipRename["CUBLAS_ATOMICS_ALLOWED"] = {"HIPBLAS_ATOMICS_ALLOWED", CONV_BLAS}; + // Blas Data Type + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasDataType_t"] = {"hipblasDataType_t", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DATA_FLOAT"] = {"HIPBLAS_DATA_FLOAT", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DATA_DOUBLE"] = {"HIPBLAS_DATA_DOUBLE", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DATA_HALF"] = {"HIPBLAS_DATA_HALF", CONV_BLAS}; + //cuda2hipRename["CUBLAS_DATA_INT8"] = {"HIPBLAS_DATA_INT8", CONV_BLAS}; + + // Blas1 (v1) Routines + cuda2hipRename["cublasCreate"] = {"hipblasCreate", CONV_BLAS}; + cuda2hipRename["cublasDestroy"] = {"hipblasDestroy", CONV_BLAS}; + + cuda2hipRename["cublasSetVector"] = {"hipblasSetVector", CONV_BLAS}; + cuda2hipRename["cublasGetVector"] = {"hipblasGetVector", CONV_BLAS}; + cuda2hipRename["cublasSetMatrix"] = {"hipblasSetMatrix", CONV_BLAS}; + cuda2hipRename["cublasGetMatrix"] = {"hipblasGetMatrix", CONV_BLAS}; + + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasGetMatrixAsync"] = {"hipblasGetMatrixAsync", CONV_BLAS}; + //cuda2hipRename["cublasSetMatrixAsync"] = {"hipblasSetMatrixAsync", CONV_BLAS}; + + // NRM2 + //cuda2hipRename["cublasSnrm2"] = {"hipblasSnrm2", CONV_BLAS}; + //cuda2hipRename["cublasDnrm2"] = {"hipblasDnrm2", CONV_BLAS}; + //cuda2hipRename["cublasScnrm2"] = {"hipblasScnrm2", CONV_BLAS}; + //cuda2hipRename["cublasDznrm2"] = {"hipblasDznrm2", CONV_BLAS}; + + // DOT + cuda2hipRename["cublasSdot"] = {"hipblasSdot", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasSdotBatched"] = {"hipblasSdotBatched",CONV_BLAS}; + cuda2hipRename["cublasDdot"] = {"hipblasDdot", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasDdotBatched"] = {"hipblasDdotBatched", CONV_BLAS}; + //cuda2hipRename["cublasCdotu"] = {"hipblasCdotu", CONV_BLAS}; + //cuda2hipRename["cublasCdotc"] = {"hipblasCdotc", CONV_BLAS}; + //cuda2hipRename["cublasZdotu"] = {"hipblasZdotu", CONV_BLAS}; + //cuda2hipRename["cublasZdotc"] = {"hipblasZdotc", CONV_BLAS}; + + // SCAL + cuda2hipRename["cublasSscal"] = {"hipblasSscal", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasSscalBatched"] = {"hipblasSscalBatched", CONV_BLAS}; + cuda2hipRename["cublasDscal"] = {"hipblasDscal", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasDscalBatched"] = {"hipblasDscalBatched", CONV_BLAS}; + //cuda2hipRename["cublasCscal"] = {"hipblasCscal", CONV_BLAS}; + //cuda2hipRename["cublasCsscal"] = {"hipblasCsscal", CONV_BLAS}; + //cuda2hipRename["cublasZscal"] = {"hipblasZscal", CONV_BLAS}; + //cuda2hipRename["cublasZdscal"] = {"hipblasZdscal", CONV_BLAS}; + + // AXPY + cuda2hipRename["cublasSaxpy"] = {"hipblasSaxpy", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasSaxpyBatched"] = {"hipblasSaxpyBatched", CONV_BLAS}; + //cuda2hipRename["cublasDaxpy"] = {"hipblasDaxpy", CONV_BLAS}; + //cuda2hipRename["cublasCaxpy"] = {"hipblasCaxpy", CONV_BLAS}; + //cuda2hipRename["cublasZaxpy"] = {"hipblasZaxpy", CONV_BLAS}; + + // COPY + cuda2hipRename["cublasScopy"] = {"hipblasScopy", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasScopyBatched"] = {"hipblasScopyBatched", CONV_BLAS}; + cuda2hipRename["cublasDcopy"] = {"hipblasDcopy", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasDcopyBatched"] = {"hipblasDcopyBatched", CONV_BLAS}; + //cuda2hipRename["cublasCcopy"] = {"hipblasCcopy", CONV_BLAS}; + //cuda2hipRename["cublasZcopy"] = {"hipblasZcopy", CONV_BLAS}; + + // SWAP + //cuda2hipRename["cublasSswap"] = {"hipblasSswap", CONV_BLAS}; + //cuda2hipRename["cublasDswap"] = {"hipblasDswap", CONV_BLAS}; + //cuda2hipRename["cublasCswap"] = {"hipblasCswap", CONV_BLAS}; + //cuda2hipRename["cublasZswap"] = {"hipblasZswap", CONV_BLAS}; + + // AMAX + //cuda2hipRename["cublasIsamax"] = {"hipblasIsamax", CONV_BLAS}; + //cuda2hipRename["cublasIdamax"] = {"hipblasIdamax", CONV_BLAS}; + //cuda2hipRename["cublasIcamax"] = {"hipblasIcamax", CONV_BLAS}; + //cuda2hipRename["cublasIzamax"] = {"hipblasIzamax", CONV_BLAS}; + + // AMIN + //cuda2hipRename["cublasIsamin"] = {"hipblasIsamin", CONV_BLAS}; + //cuda2hipRename["cublasIdamin"] = {"hipblasIdamin", CONV_BLAS}; + //cuda2hipRename["cublasIcamin"] = {"hipblasIcamin", CONV_BLAS}; + //cuda2hipRename["cublasIzamin"] = {"hipblasIzamin", CONV_BLAS}; + + // ASUM + cuda2hipRename["cublasSasum"] = {"hipblasSasum", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasSasumBatched"] = {"hipblasSasumBatched", CONV_BLAS}; + cuda2hipRename["cublasDasum"] = {"hipblasDasum", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasDasumBatched"] = {"hipblasDasumBatched", CONV_BLAS}; + //cuda2hipRename["cublasScasum"] = {"hipblasScasum", CONV_BLAS}; + //cuda2hipRename["cublasDzasum"] = {"hipblasDzasum", CONV_BLAS}; + + // ROT + //cuda2hipRename["cublasSrot"] = {"hipblasSrot", CONV_BLAS}; + //cuda2hipRename["cublasDrot"] = {"hipblasDrot", CONV_BLAS}; + //cuda2hipRename["cublasCrot"] = {"hipblasCrot", CONV_BLAS}; + //cuda2hipRename["cublasCsrot"] = {"hipblasCsrot", CONV_BLAS}; + //cuda2hipRename["cublasZrot"] = {"hipblasZrot", CONV_BLAS}; + //cuda2hipRename["cublasZdrot"] = {"hipblasZdrot", CONV_BLAS}; + + // ROTG + //cuda2hipRename["cublasSrotg"] = {"hipblasSrotg", CONV_BLAS}; + //cuda2hipRename["cublasDrotg"] = {"hipblasDrotg", CONV_BLAS}; + //cuda2hipRename["cublasCrotg"] = {"hipblasCrotg", CONV_BLAS}; + //cuda2hipRename["cublasZrotg"] = {"hipblasZrotg", CONV_BLAS}; + + // ROTM + //cuda2hipRename["cublasSrotm"] = {"hipblasSrotm", CONV_BLAS}; + //cuda2hipRename["cublasDrotm"] = {"hipblasDrotm", CONV_BLAS}; + + // ROTMG + //cuda2hipRename["cublasSrotmg"] = {"hipblasSrotmg", CONV_BLAS}; + //cuda2hipRename["cublasDrotmg"] = {"hipblasDrotmg", CONV_BLAS}; + + // GEMV + cuda2hipRename["cublasSgemv"] = {"hipblasSgemv", CONV_BLAS}; + // there is no such a function in CUDA + cuda2hipRename["cublasSgemvBatched"] = {"hipblasSgemvBatched", CONV_BLAS}; + //cuda2hipRename["cublasDgemv"] = {"hipblasDgemv", CONV_BLAS}; + //cuda2hipRename["cublasCgemv"] = {"hipblasCgemv", CONV_BLAS}; + //cuda2hipRename["cublasZgemv"] = {"hipblasZgemv", CONV_BLAS}; + + // GBMV + //cuda2hipRename["cublasSgbmv"] = {"hipblasSgbmv", CONV_BLAS}; + //cuda2hipRename["cublasDgbmv"] = {"hipblasDgbmv", CONV_BLAS}; + //cuda2hipRename["cublasCgbmv"] = {"hipblasCgbmv", CONV_BLAS}; + //cuda2hipRename["cublasZgbmv"] = {"hipblasZgbmv", CONV_BLAS}; + + // TRMV + //cuda2hipRename["cublasStrmv"] = {"hipblasStrmv", CONV_BLAS}; + //cuda2hipRename["cublasDtrmv"] = {"hipblasDtrmv", CONV_BLAS}; + //cuda2hipRename["cublasCtrmv"] = {"hipblasCtrmv", CONV_BLAS}; + //cuda2hipRename["cublasZtrmv"] = {"hipblasZtrmv", CONV_BLAS}; + + // TBMV + //cuda2hipRename["cublasStbmv"] = {"hipblasStbmv", CONV_BLAS}; + //cuda2hipRename["cublasDtbmv"] = {"hipblasDtbmv", CONV_BLAS}; + //cuda2hipRename["cublasCtbmv"] = {"hipblasCtbmv", CONV_BLAS}; + //cuda2hipRename["cublasZtbmv"] = {"hipblasZtbmv", CONV_BLAS}; + + // TPMV + //cuda2hipRename["cublasStpmv"] = {"hipblasStpmv", CONV_BLAS}; + //cuda2hipRename["cublasDtpmv"] = {"hipblasDtpmv", CONV_BLAS}; + //cuda2hipRename["cublasCtpmv"] = {"hipblasCtpmv", CONV_BLAS}; + //cuda2hipRename["cublasZtpmv"] = {"hipblasZtpmv", CONV_BLAS}; + + // TRSV + //cuda2hipRename["cublasStrsv"] = {"hipblasStrsv", CONV_BLAS}; + //cuda2hipRename["cublasDtrsv"] = {"hipblasDtrsv", CONV_BLAS}; + //cuda2hipRename["cublasCtrsv"] = {"hipblasCtrsv", CONV_BLAS}; + //cuda2hipRename["cublasZtrsv"] = {"hipblasZtrsv", CONV_BLAS}; + + // TPSV + //cuda2hipRename["cublasStpsv"] = {"hipblasStpsv", CONV_BLAS}; + //cuda2hipRename["cublasDtpsv"] = {"hipblasDtpsv", CONV_BLAS}; + //cuda2hipRename["cublasCtpsv"] = {"hipblasCtpsv", CONV_BLAS}; + //cuda2hipRename["cublasZtpsv"] = {"hipblasZtpsv", CONV_BLAS}; + + // TBSV + //cuda2hipRename["cublasStbsv"] = {"hipblasStbsv", CONV_BLAS}; + //cuda2hipRename["cublasDtbsv"] = {"hipblasDtbsv", CONV_BLAS}; + //cuda2hipRename["cublasCtbsv"] = {"hipblasCtbsv", CONV_BLAS}; + //cuda2hipRename["cublasZtbsv"] = {"hipblasZtbsv", CONV_BLAS}; + + // SYMV/HEMV + //cuda2hipRename["cublasSsymv"] = {"hipblasSsymv", CONV_BLAS}; + //cuda2hipRename["cublasDsymv"] = {"hipblasDsymv", CONV_BLAS}; + //cuda2hipRename["cublasCsymv"] = {"hipblasCsymv", CONV_BLAS}; + //cuda2hipRename["cublasZsymv"] = {"hipblasZsymv", CONV_BLAS}; + //cuda2hipRename["cublasChemv"] = {"hipblasChemv", CONV_BLAS}; + //cuda2hipRename["cublasZhemv"] = {"hipblasZhemv", CONV_BLAS}; + + // SBMV/HBMV + //cuda2hipRename["cublasSsbmv"] = {"hipblasSsbmv", CONV_BLAS}; + //cuda2hipRename["cublasDsbmv"] = {"hpiblasDsbmv", CONV_BLAS}; + //cuda2hipRename["cublasChbmv"] = {"hipblasChbmv", CONV_BLAS}; + //cuda2hipRename["cublasZhbmv"] = {"hipblasZhbmv", CONV_BLAS}; + + // SPMV/HPMV + //cuda2hipRename["cublasSspmv"] = {"hipblasSspmv", CONV_BLAS}; + //cuda2hipRename["cublasDspmv"] = {"hipblasDspmv", CONV_BLAS}; + //cuda2hipRename["cublasChpmv"] = {"hipblasChpmv", CONV_BLAS}; + //cuda2hipRename["cublasZhpmv"] = {"hipblasZhpmv", CONV_BLAS}; + + // GER + cuda2hipRename["cublasSger"] = {"hipblasSger", CONV_BLAS}; + //cuda2hipRename["cublasDger"] = {"hipblasDger", CONV_BLAS}; + //cuda2hipRename["cublasCgeru"] = {"hipblasCgeru", CONV_BLAS}; + //cuda2hipRename["cublasCgerc"] = {"hipblasCgerc", CONV_BLAS}; + //cuda2hipRename["cublasZgeru"] = {"hipblasZgeru", CONV_BLAS}; + //cuda2hipRename["cublasZgerc"] = {"hipblasZgerc", CONV_BLAS}; + + // SYR/HER + //cuda2hipRename["cublasSsyr"] = {"hipblasSsyr", CONV_BLAS}; + //cuda2hipRename["cublasDsyr"] = {"hipblasDsyr", CONV_BLAS}; + //cuda2hipRename["cublasCher"] = {"hipblasCher", CONV_BLAS}; + //cuda2hipRename["cublasZher"] = {"hipblasZher", CONV_BLAS}; + + // SPR/HPR + //cuda2hipRename["cublasSspr"] = {"hipblasSspr", CONV_BLAS}; + //cuda2hipRename["cublasDspr"] = {"hipblasDspr", CONV_BLAS}; + //cuda2hipRename["cublasChpr"] = {"hipblasChpr", CONV_BLAS}; + //cuda2hipRename["cublasZhpr"] = {"hipblasZhpr", CONV_BLAS}; + + // SYR2/HER2 + //cuda2hipRename["cublasSsyr2"] = {"hipblasSsyr2", CONV_BLAS}; + //cuda2hipRename["cublasDsyr2"] = {"hipblasDsyr2", CONV_BLAS}; + //cuda2hipRename["cublasCher2"] = {"hipblasCher2", CONV_BLAS}; + //cuda2hipRename["cublasZher2"] = {"hipblasZher2", CONV_BLAS}; + + // SPR2/HPR2 + //cuda2hipRename["cublasSspr2"] = {"hipblasSspr2", CONV_BLAS}; + //cuda2hipRename["cublasDspr2"] = {"hipblasDspr2", CONV_BLAS}; + //cuda2hipRename["cublasChpr2"] = {"hipblasChpr2", CONV_BLAS}; + //cuda2hipRename["cublasZhpr2"] = {"hipblasZhpr2", CONV_BLAS}; + + // Blas3 (v1) Routines + // GEMM + cuda2hipRename["cublasSgemm"] = {"hipblasSgemm", CONV_BLAS}; + //cuda2hipRename["cublasDgemm"] = {"hipblasDgemm", CONV_BLAS}; + cuda2hipRename["cublasCgemm"] = {"hipblasCgemm", CONV_BLAS}; + //cuda2hipRename["cublasZgemm"] = {"hipblasZgemm", CONV_BLAS}; + + // BATCH GEMM + cuda2hipRename["cublasSgemmBatched"] = {"hipblasSgemmBatched", CONV_BLAS}; + //cuda2hipRename["cublasDgemmBatched"] = {"hipblasDgemmBatched", CONV_BLAS}; + cuda2hipRename["cublasCgemmBatched"] = {"hipblasCgemmBatched", CONV_BLAS}; + //cuda2hipRename["cublasZgemmBatched"] = {"hipblasZgemmBatched", CONV_BLAS}; + + // SYRK + //cuda2hipRename["cublasSsyrk"] = {"hipblasSsyrk", CONV_BLAS}; + //cuda2hipRename["cublasDsyrk"] = {"hipblasDsyrk", CONV_BLAS}; + //cuda2hipRename["cublasCsyrk"] = {"hipblasCsyrk", CONV_BLAS}; + //cuda2hipRename["cublasZsyrk"] = {"hipblasZsyrk", CONV_BLAS}; + + // HERK + //cuda2hipRename["cublasCherk"] = {"hipblasCherk", CONV_BLAS}; + //cuda2hipRename["cublasZherk"] = {"hipblasZherk", CONV_BLAS}; + + // SYR2K + //cuda2hipRename["cublasSsyr2k"] = {"hipblasSsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasDsyr2k"] = {"hipblasDsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasCsyr2k"] = {"hipblasCsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasZsyr2k"] = {"hipblasZsyr2k", CONV_BLAS}; + + // SYRKX - eXtended SYRK + // cublasSsyrkx + // cublasDsyrkx + // cublasCsyrkx + // cublasZsyrkx + + // HER2K + //cuda2hipRename["cublasCher2k"] = {"hipblasCher2k", CONV_BLAS}; + //cuda2hipRename["cublasZher2k"] = {"hipblasZher2k", CONV_BLAS}; + + // HERKX - eXtended HERK + // cublasCherkx + // cublasZherkx + + // SYMM + //cuda2hipRename["cublasSsymm"] = {"hipblasSsymm", CONV_BLAS}; + //cuda2hipRename["cublasDsymm"] = {"hipblasDsymm", CONV_BLAS}; + //cuda2hipRename["cublasCsymm"] = {"hipblasCsymm", CONV_BLAS}; + //cuda2hipRename["cublasZsymm"] = {"hipblasZsymm", CONV_BLAS}; + + // HEMM + //cuda2hipRename["cublasChemm"] = {"hipblasChemm", CONV_BLAS}; + //cuda2hipRename["cublasZhemm"] = {"hipblasZhemm", CONV_BLAS}; + + // TRSM + //cuda2hipRename["cublasStrsm"] = {"hipblasStrsm", CONV_BLAS}; + //cuda2hipRename["cublasDtrsm"] = {"hipblasDtrsm", CONV_BLAS}; + //cuda2hipRename["cublasCtrsm"] = {"hipblasCtrsm", CONV_BLAS}; + //cuda2hipRename["cublasZtrsm"] = {"hipblasZtrsm", CONV_BLAS}; + + // TRSM - Batched Triangular Solver + //cuda2hipRename["cublasStrsmBatched"] = {"hipblasStrsmBatched", CONV_BLAS}; + //cuda2hipRename["cublasDtrsmBatched"] = {"hipblasDtrsmBatched", CONV_BLAS}; + //cuda2hipRename["cublasCtrsmBatched"] = {"hipblasCtrsmBatched", CONV_BLAS}; + //cuda2hipRename["cublasZtrsmBatched"] = {"hipblasZtrsmBatched", CONV_BLAS}; + + // TRMM + //cuda2hipRename["cublasStrmm"] = {"hipblasStrmm", CONV_BLAS}; + //cuda2hipRename["cublasDtrmm"] = {"hipblasDtrmm", CONV_BLAS}; + //cuda2hipRename["cublasCtrmm"] = {"hipblasCtrmm", CONV_BLAS}; + //cuda2hipRename["cublasZtrmm"] = {"hipblasZtrmm", CONV_BLAS}; + + + // TO SUPPORT OR NOT? (cublas_api.h) + // ------------------------ CUBLAS BLAS - like extension + + // GEAM + // cublasSgeam + // cublasDgeam + // cublasCgeam + // cublasZgeam + + // GETRF - Batched LU + // cublasSgetrfBatched + // cublasDgetrfBatched + // cublasCgetrfBatched + // cublasZgetrfBatched + + // Batched inversion based on LU factorization from getrf + // cublasSgetriBatched + // cublasDgetriBatched + // cublasCgetriBatched + // cublasZgetriBatched + + // Batched solver based on LU factorization from getrf + // cublasSgetrsBatched + // cublasDgetrsBatched + // cublasCgetrsBatched + // cublasZgetrsBatched + + // TRSM - Batched Triangular Solver + // cublasStrsmBatched + // cublasDtrsmBatched + // cublasCtrsmBatched + // cublasZtrsmBatched + + // MATINV - Batched + // cublasSmatinvBatched + // cublasDmatinvBatched + // cublasCmatinvBatched + // cublasZmatinvBatched + + // Batch QR Factorization + // cublasSgeqrfBatched + // cublasDgeqrfBatched + // cublasCgeqrfBatched + // cublasZgeqrfBatched + + // Least Square Min only m >= n and Non-transpose supported + // cublasSgelsBatched + // cublasDgelsBatched + // cublasCgelsBatched + // cublasZgelsBatched + + // DGMM + // cublasSdgmm + // cublasDdgmm + // cublasCdgmm + // cublasZdgmm + + // TPTTR - Triangular Pack format to Triangular format + // cublasStpttr + // cublasDtpttr + // cublasCtpttr + // cublasZtpttr + + // TRTTP - Triangular format to Triangular Pack format + // cublasStrttp + // cublasDtrttp + // cublasCtrttp + // cublasZtrttp + + // Blas2 (v2) Routines + cuda2hipRename["cublasCreate_v2"] = {"hipblasCreate", CONV_BLAS}; + cuda2hipRename["cublasDestroy_v2"] = {"hipblasDestroy", CONV_BLAS}; + + // unsupported yet by hipblas/hcblas + //cuda2hipRename["cublasGetVersion_v2"] = {"hipblasGetVersion", CONV_BLAS}; + //cuda2hipRename["cublasSetStream_v2"] = {"hipblasSetStream", CONV_BLAS}; + //cuda2hipRename["cublasGetStream_v2"] = {"hipblasGetStream", CONV_BLAS}; + //cuda2hipRename["cublasGetPointerMode_v2"] = {"hipblasGetPointerMode", CONV_BLAS}; + //cuda2hipRename["cublasSetPointerMode_v2"] = {"hipblasSetPointerMode", CONV_BLAS}; + + // GEMV + cuda2hipRename["cublasSgemv_v2"] = {"hipblasSgemv", CONV_BLAS}; + //cuda2hipRename["cublasDgemv_v2"] = {"hipblasDgemv", CONV_BLAS}; + //cuda2hipRename["cublasCgemv_v2"] = {"hipblasCgemv", CONV_BLAS}; + //cuda2hipRename["cublasZgemv_v2"] = {"hipblasZgemv", CONV_BLAS}; + + // GBMV + //cuda2hipRename["cublasSgbmv_v2"] = {"hipblasSgbmv", CONV_BLAS}; + //cuda2hipRename["cublasDgbmv_v2"] = {"hipblasDgbmv", CONV_BLAS}; + //cuda2hipRename["cublasCgbmv_v2"] = {"hipblasCgbmv", CONV_BLAS}; + //cuda2hipRename["cublasZgbmv_v2"] = {"hipblasZgbmv", CONV_BLAS}; + + // TRMV + //cuda2hipRename["cublasStrmv_v2"] = {"hipblasStrmv", CONV_BLAS}; + //cuda2hipRename["cublasDtrmv_v2"] = {"hipblasDtrmv", CONV_BLAS}; + //cuda2hipRename["cublasCtrmv_v2"] = {"hipblasCtrmv", CONV_BLAS}; + //cuda2hipRename["cublasZtrmv_v2"] = {"hipblasZtrmv", CONV_BLAS}; + + // TBMV + //cuda2hipRename["cublasStbmv_v2"] = {"hipblasStbmv", CONV_BLAS}; + //cuda2hipRename["cublasDtbmv_v2"] = {"hipblasDtbmv", CONV_BLAS}; + //cuda2hipRename["cublasCtbmv_v2"] = {"hipblasCtbmv", CONV_BLAS}; + //cuda2hipRename["cublasZtbmv_v2"] = {"hipblasZtbmv", CONV_BLAS}; + + // TPMV + //cuda2hipRename["cublasStpmv_v2"] = {"hipblasStpmv", CONV_BLAS}; + //cuda2hipRename["cublasDtpmv_v2"] = {"hipblasDtpmv", CONV_BLAS}; + //cuda2hipRename["cublasCtpmv_v2"] = {"hipblasCtpmv", CONV_BLAS}; + //cuda2hipRename["cublasZtpmv_v2"] = {"hipblasZtpmv", CONV_BLAS}; + + // TRSV + //cuda2hipRename["cublasStrsv_v2"] = {"hipblasStrsv", CONV_BLAS}; + //cuda2hipRename["cublasDtrsv_v2"] = {"hipblasDtrsv", CONV_BLAS}; + //cuda2hipRename["cublasCtrsv_v2"] = {"hipblasCtrsv", CONV_BLAS}; + //cuda2hipRename["cublasZtrsv_v2"] = {"hipblasZtrsv", CONV_BLAS}; + + // TPSV + //cuda2hipRename["cublasStpsv_v2"] = {"hipblasStpsv", CONV_BLAS}; + //cuda2hipRename["cublasDtpsv_v2"] = {"hipblasDtpsv", CONV_BLAS}; + //cuda2hipRename["cublasCtpsv_v2"] = {"hipblasCtpsv", CONV_BLAS}; + //cuda2hipRename["cublasZtpsv_v2"] = {"hipblasZtpsv", CONV_BLAS}; + + // TBSV + //cuda2hipRename["cublasStbsv_v2"] = {"hipblasStbsv", CONV_BLAS}; + //cuda2hipRename["cublasDtbsv_v2"] = {"hipblasDtbsv", CONV_BLAS}; + //cuda2hipRename["cublasCtbsv_v2"] = {"hipblasCtbsv", CONV_BLAS}; + //cuda2hipRename["cublasZtbsv_v2"] = {"hipblasZtbsv", CONV_BLAS}; + + // SYMV/HEMV + //cuda2hipRename["cublasSsymv_v2"] = {"hipblasSsymv", CONV_BLAS}; + //cuda2hipRename["cublasDsymv_v2"] = {"hipblasDsymv", CONV_BLAS}; + //cuda2hipRename["cublasCsymv_v2"] = {"hipblasCsymv", CONV_BLAS}; + //cuda2hipRename["cublasZsymv_v2"] = {"hipblasZsymv", CONV_BLAS}; + //cuda2hipRename["cublasChemv_v2"] = {"hipblasChemv", CONV_BLAS}; + //cuda2hipRename["cublasZhemv_v2"] = {"hipblasZhemv", CONV_BLAS}; + + // SBMV/HBMV + //cuda2hipRename["cublasSsbmv_v2"] = {"hipblasSsbmv", CONV_BLAS}; + //cuda2hipRename["cublasDsbmv_v2"] = {"hpiblasDsbmv", CONV_BLAS}; + //cuda2hipRename["cublasChbmv_v2"] = {"hipblasChbmv", CONV_BLAS}; + //cuda2hipRename["cublasZhbmv_v2"] = {"hipblasZhbmv", CONV_BLAS}; + + // SPMV/HPMV + //cuda2hipRename["cublasSspmv_v2"] = {"hipblasSspmv", CONV_BLAS}; + //cuda2hipRename["cublasDspmv_v2"] = {"hipblasDspmv", CONV_BLAS}; + //cuda2hipRename["cublasChpmv_v2"] = {"hipblasChpmv", CONV_BLAS}; + //cuda2hipRename["cublasZhpmv_v2"] = {"hipblasZhpmv", CONV_BLAS}; + + // GER + cuda2hipRename["cublasSger_v2"] = {"hipblasSger", CONV_BLAS}; + //cuda2hipRename["cublasDger_v2"] = {"hipblasDger", CONV_BLAS}; + //cuda2hipRename["cublasCgeru_v2"] = {"hipblasCgeru", CONV_BLAS}; + //cuda2hipRename["cublasCgerc_v2"] = {"hipblasCgerc", CONV_BLAS}; + //cuda2hipRename["cublasZgeru_v2"] = {"hipblasZgeru", CONV_BLAS}; + //cuda2hipRename["cublasZgerc_v2"] = {"hipblasZgerc", CONV_BLAS}; + + // SYR/HER + //cuda2hipRename["cublasSsyr_v2"] = {"hipblasSsyr", CONV_BLAS}; + //cuda2hipRename["cublasDsyr_v2"] = {"hipblasDsyr", CONV_BLAS}; + //cuda2hipRename["cublasCsyr_v2"] = {"hipblasCsyr", CONV_BLAS}; + //cuda2hipRename["cublasZsyr_v2"] = {"hipblasZsyr", CONV_BLAS}; + //cuda2hipRename["cublasCher_v2"] = {"hipblasCher", CONV_BLAS}; + //cuda2hipRename["cublasZher_v2"] = {"hipblasZher", CONV_BLAS}; + + // SPR/HPR + //cuda2hipRename["cublasSspr_v2"] = {"hipblasSspr", CONV_BLAS}; + //cuda2hipRename["cublasDspr_v2"] = {"hipblasDspr", CONV_BLAS}; + //cuda2hipRename["cublasChpr_v2"] = {"hipblasChpr", CONV_BLAS}; + //cuda2hipRename["cublasZhpr_v2"] = {"hipblasZhpr", CONV_BLAS}; + + // SYR2/HER2 + //cuda2hipRename["cublasSsyr2_v2"] = {"hipblasSsyr2", CONV_BLAS}; + //cuda2hipRename["cublasDsyr2_v2"] = {"hipblasDsyr2", CONV_BLAS}; + //cuda2hipRename["cublasCsyr2_v2"] = {"hipblasCsyr2", CONV_BLAS}; + //cuda2hipRename["cublasZsyr2_v2"] = {"hipblasZsyr2", CONV_BLAS}; + //cuda2hipRename["cublasCher2_v2"] = {"hipblasCher2", CONV_BLAS}; + //cuda2hipRename["cublasZher2_v2"] = {"hipblasZher2", CONV_BLAS}; + + // SPR2/HPR2 + //cuda2hipRename["cublasSspr2_v2"] = {"hipblasSspr2", CONV_BLAS}; + //cuda2hipRename["cublasDspr2_v2"] = {"hipblasDspr2", CONV_BLAS}; + //cuda2hipRename["cublasChpr2_v2"] = {"hipblasChpr2", CONV_BLAS}; + //cuda2hipRename["cublasZhpr2_v2"] = {"hipblasZhpr2", CONV_BLAS}; + + // Blas3 (v2) Routines + // GEMM + cuda2hipRename["cublasSgemm_v2"] = {"hipblasSgemm", CONV_BLAS}; + //cuda2hipRename["cublasDgemm_v2"] = {"hipblasDgemm", CONV_BLAS}; + cuda2hipRename["cublasCgemm_v2"] = {"hipblasCgemm", CONV_BLAS}; + //cuda2hipRename["cublasZgemm_v2"] = {"hipblasZgemm", CONV_BLAS}; + + //IO in FP16 / FP32, computation in float + // cublasSgemmEx + + // SYRK + //cuda2hipRename["cublasSsyrk_v2"] = {"hipblasSsyrk", CONV_BLAS}; + //cuda2hipRename["cublasDsyrk_v2"] = {"hipblasDsyrk", CONV_BLAS}; + //cuda2hipRename["cublasCsyrk_v2"] = {"hipblasCsyrk", CONV_BLAS}; + //cuda2hipRename["cublasZsyrk_v2"] = {"hipblasZsyrk", CONV_BLAS}; + + // HERK + //cuda2hipRename["cublasCherk_v2"] = {"hipblasCherk", CONV_BLAS}; + //cuda2hipRename["cublasZherk_v2"] = {"hipblasZherk", CONV_BLAS}; + + // SYR2K + //cuda2hipRename["cublasSsyr2k_v2"] = {"hipblasSsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasDsyr2k_v2"] = {"hipblasDsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasCsyr2k_v2"] = {"hipblasCsyr2k", CONV_BLAS}; + //cuda2hipRename["cublasZsyr2k_v2"] = {"hipblasZsyr2k", CONV_BLAS}; + + // HER2K + //cuda2hipRename["cublasCher2k_v2"] = {"hipblasCher2k", CONV_BLAS}; + //cuda2hipRename["cublasZher2k_v2"] = {"hipblasZher2k", CONV_BLAS}; + + // SYMM + //cuda2hipRename["cublasSsymm_v2"] = {"hipblasSsymm", CONV_BLAS}; + //cuda2hipRename["cublasDsymm_v2"] = {"hipblasDsymm", CONV_BLAS}; + //cuda2hipRename["cublasCsymm_v2"] = {"hipblasCsymm", CONV_BLAS}; + //cuda2hipRename["cublasZsymm_v2"] = {"hipblasZsymm", CONV_BLAS}; + + // HEMM + //cuda2hipRename["cublasChemm_v2"] = {"hipblasChemm", CONV_BLAS}; + //cuda2hipRename["cublasZhemm_v2"] = {"hipblasZhemm", CONV_BLAS}; + + // TRSM + //cuda2hipRename["cublasStrsm_v2"] = {"hipblasStrsm", CONV_BLAS}; + //cuda2hipRename["cublasDtrsm_v2"] = {"hipblasDtrsm", CONV_BLAS}; + //cuda2hipRename["cublasCtrsm_v2"] = {"hipblasCtrsm", CONV_BLAS}; + //cuda2hipRename["cublasZtrsm_v2"] = {"hipblasZtrsm", CONV_BLAS}; + + // TRMM + //cuda2hipRename["cublasStrmm_v2"] = {"hipblasStrmm", CONV_BLAS}; + //cuda2hipRename["cublasDtrmm_v2"] = {"hipblasDtrmm", CONV_BLAS}; + //cuda2hipRename["cublasCtrmm_v2"] = {"hipblasCtrmm", CONV_BLAS}; + //cuda2hipRename["cublasZtrmm_v2"] = {"hipblasZtrmm", CONV_BLAS}; + + // NRM2 + //cuda2hipRename["cublasSnrm2_v2"] = {"hipblasSnrm2", CONV_BLAS}; + //cuda2hipRename["cublasDnrm2_v2"] = {"hipblasDnrm2", CONV_BLAS}; + //cuda2hipRename["cublasScnrm2_v2"] = {"hipblasScnrm2", CONV_BLAS}; + //cuda2hipRename["cublasDznrm2_v2"] = {"hipblasDznrm2", CONV_BLAS}; + + // DOT + cuda2hipRename["cublasSdot_v2"] = {"hipblasSdot", CONV_BLAS}; + cuda2hipRename["cublasDdot_v2"] = {"hipblasDdot", CONV_BLAS}; + //cuda2hipRename["cublasCdotu_v2"] = {"hipblasCdotu", CONV_BLAS}; + //cuda2hipRename["cublasCdotc_v2"] = {"hipblasCdotc", CONV_BLAS}; + //cuda2hipRename["cublasZdotu_v2"] = {"hipblasZdotu", CONV_BLAS}; + //cuda2hipRename["cublasZdotc_v2"] = {"hipblasZdotc", CONV_BLAS}; + + // SCAL + cuda2hipRename["cublasSscal_v2"] = {"hipblasSscal", CONV_BLAS}; + cuda2hipRename["cublasDscal_v2"] = {"hipblasDscal", CONV_BLAS}; + //cuda2hipRename["cublasCscal_v2"] = {"hipblasCscal", CONV_BLAS}; + //cuda2hipRename["cublasCsscal_v2"] = {"hipblasCsscal", CONV_BLAS}; + //cuda2hipRename["cublasZscal_v2"] = {"hipblasZscal", CONV_BLAS}; + //cuda2hipRename["cublasZdscal_v2"] = {"hipblasZdscal", CONV_BLAS}; + + // AXPY + cuda2hipRename["cublasSaxpy_v2"] = {"hipblasSaxpy", CONV_BLAS}; + //cuda2hipRename["cublasDaxpy_v2"] = {"hipblasDaxpy", CONV_BLAS}; + //cuda2hipRename["cublasCaxpy_v2"] = {"hipblasCaxpy", CONV_BLAS}; + //cuda2hipRename["cublasZaxpy_v2"] = {"hipblasZaxpy", CONV_BLAS}; + + // COPY + cuda2hipRename["cublasScopy_v2"] = {"hipblasScopy", CONV_BLAS}; + cuda2hipRename["cublasDcopy_v2"] = {"hipblasDcopy", CONV_BLAS}; + //cuda2hipRename["cublasCcopy_v2"] = {"hipblasCcopy", CONV_BLAS}; + //cuda2hipRename["cublasZcopy_v2"] = {"hipblasZcopy", CONV_BLAS}; + + // SWAP + //cuda2hipRename["cublasSswap_v2"] = {"hipblasSswap", CONV_BLAS}; + //cuda2hipRename["cublasDswap_v2"] = {"hipblasDswap", CONV_BLAS}; + //cuda2hipRename["cublasCswap_v2"] = {"hipblasCswap", CONV_BLAS}; + //cuda2hipRename["cublasZswap_v2"] = {"hipblasZswap", CONV_BLAS}; + + // AMAX + //cuda2hipRename["cublasIsamax_v2"] = {"hipblasIsamax", CONV_BLAS}; + //cuda2hipRename["cublasIdamax_v2"] = {"hipblasIdamax", CONV_BLAS}; + //cuda2hipRename["cublasIcamax_v2"] = {"hipblasIcamax", CONV_BLAS}; + //cuda2hipRename["cublasIzamax_v2"] = {"hipblasIzamax", CONV_BLAS}; + + // AMIN + //cuda2hipRename["cublasIsamin_v2"] = {"hipblasIsamin", CONV_BLAS}; + //cuda2hipRename["cublasIdamin_v2"] = {"hipblasIdamin", CONV_BLAS}; + //cuda2hipRename["cublasIcamin_v2"] = {"hipblasIcamin", CONV_BLAS}; + //cuda2hipRename["cublasIzamin_v2"] = {"hipblasIzamin", CONV_BLAS}; + + // ASUM + cuda2hipRename["cublasSasum_v2"] = {"hipblasSasum", CONV_BLAS}; + cuda2hipRename["cublasDasum_v2"] = {"hipblasDasum", CONV_BLAS}; + //cuda2hipRename["cublasScasum_v2"] = {"hipblasScasum", CONV_BLAS}; + //cuda2hipRename["cublasDzasum_v2"] = {"hipblasDzasum", CONV_BLAS}; + + // ROT + //cuda2hipRename["cublasSrot_v2"] = {"hipblasSrot", CONV_BLAS}; + //cuda2hipRename["cublasDrot_v2"] = {"hipblasDrot", CONV_BLAS}; + //cuda2hipRename["cublasCrot_v2"] = {"hipblasCrot", CONV_BLAS}; + //cuda2hipRename["cublasCsrot_v2"] = {"hipblasCsrot", CONV_BLAS}; + //cuda2hipRename["cublasZrot_v2"] = {"hipblasZrot", CONV_BLAS}; + //cuda2hipRename["cublasZdrot_v2"] = {"hipblasZdrot", CONV_BLAS}; + + // ROTG + //cuda2hipRename["cublasSrotg_v2"] = {"hipblasSrotg", CONV_BLAS}; + //cuda2hipRename["cublasDrotg_v2"] = {"hipblasDrotg", CONV_BLAS}; + //cuda2hipRename["cublasCrotg_v2"] = {"hipblasCrotg", CONV_BLAS}; + //cuda2hipRename["cublasZrotg_v2"] = {"hipblasZrotg", CONV_BLAS}; + + // ROTM + //cuda2hipRename["cublasSrotm_v2"] = {"hipblasSrotm", CONV_BLAS}; + //cuda2hipRename["cublasDrotm_v2"] = {"hipblasDrotm", CONV_BLAS}; + + // ROTMG + //cuda2hipRename["cublasSrotmg_v2"] = {"hipblasSrotmg", CONV_BLAS}; + //cuda2hipRename["cublasDrotmg_v2"] = {"hipblasDrotmg", CONV_BLAS}; } struct HipNames { @@ -297,6 +957,7 @@ struct cuda2hipMap { }; SmallDenseMap cuda2hipRename; + std::set cudaExcludes; }; StringRef unquoteStr(StringRef s) { @@ -310,7 +971,8 @@ static void processString(StringRef s, const cuda2hipMap &map, SourceLocation start, int64_t countReps[ConvTypes::CONV_LAST]) { size_t begin = 0; - while ((begin = s.find("cuda", begin)) != StringRef::npos) { + while ((begin = s.find("cuda", begin)) != StringRef::npos || + (begin = s.find("cublas", begin)) != StringRef::npos) { const size_t end = s.find_first_of(" ", begin + 4); StringRef name = s.slice(begin, end); const auto found = map.cuda2hipRename.find(name); @@ -402,47 +1064,48 @@ struct HipifyPPCallbacks : public PPCallbacks, public SourceFileCallbacks { const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override { if (_sm->isWrittenInMainFile(MacroNameTok.getLocation())) { - for (unsigned int i = 0; Args && i < MD.getMacroInfo()->getNumArgs(); - i++) { - StringRef macroName = MacroNameTok.getIdentifierInfo()->getName(); - std::vector toks; - // Code below is a kind of stolen from 'MacroArgs::getPreExpArgument' - // to workaround the 'const' MacroArgs passed into this hook. - const Token *start = Args->getUnexpArgument(i); - size_t len = Args->getArgLength(start) + 1; -#if (LLVM_VERSION_MAJOR >= 3) && (LLVM_VERSION_MINOR >= 9) - _pp->EnterTokenStream(ArrayRef(start, len), false); -#else - _pp->EnterTokenStream(start, len, false, false); -#endif - do { - toks.push_back(Token()); - Token &tk = toks.back(); - _pp->Lex(tk); - } while (toks.back().isNot(tok::eof)); - _pp->RemoveTopOfLexerStack(); - // end of stolen code - for (auto tok : toks) { - if (tok.isAnyIdentifier()) { - StringRef name = tok.getIdentifierInfo()->getName(); - const auto found = N.cuda2hipRename.find(name); - if (found != N.cuda2hipRename.end()) { - countReps[found->second.countType]++; - StringRef repName = found->second.hipName; - DEBUG(dbgs() - << "Identifier " << name - << " found as an actual argument in expansion of macro " - << macroName << "\n" - << "will be replaced with: " << repName << "\n"); - SourceLocation sl = tok.getLocation(); - Replacement Rep(*_sm, sl, name.size(), repName); - Replace->insert(Rep); + StringRef macroName = MacroNameTok.getIdentifierInfo()->getName(); + if (N.cudaExcludes.end() == N.cudaExcludes.find(macroName)) { + for (unsigned int i = 0; Args && i < MD.getMacroInfo()->getNumArgs(); i++) { + std::vector toks; + // Code below is a kind of stolen from 'MacroArgs::getPreExpArgument' + // to workaround the 'const' MacroArgs passed into this hook. + const Token *start = Args->getUnexpArgument(i); + size_t len = Args->getArgLength(start) + 1; + #if (LLVM_VERSION_MAJOR >= 3) && (LLVM_VERSION_MINOR >= 9) + _pp->EnterTokenStream(ArrayRef(start, len), false); + #else + _pp->EnterTokenStream(start, len, false, false); + #endif + do { + toks.push_back(Token()); + Token &tk = toks.back(); + _pp->Lex(tk); + } while (toks.back().isNot(tok::eof)); + _pp->RemoveTopOfLexerStack(); + // end of stolen code + for (auto tok : toks) { + if (tok.isAnyIdentifier()) { + StringRef name = tok.getIdentifierInfo()->getName(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + DEBUG(dbgs() + << "Identifier " << name + << " found as an actual argument in expansion of macro " + << macroName << "\n" + << "will be replaced with: " << repName << "\n"); + SourceLocation sl = tok.getLocation(); + Replacement Rep(*_sm, sl, name.size(), repName); + Replace->insert(Rep); + } + } + if (tok.is(tok::string_literal)) { + StringRef s(tok.getLiteralData(), tok.getLength()); + processString(unquoteStr(s), N, Replace, *_sm, tok.getLocation(), + countReps); } - } - if (tok.is(tok::string_literal)) { - StringRef s(tok.getLiteralData(), tok.getLength()); - processString(unquoteStr(s), N, Replace, *_sm, tok.getLocation(), - countReps); } } } @@ -481,8 +1144,8 @@ public: OS << "hipLaunchParm lp"; size_t replacementLength = OS.str().size(); SourceLocation sl = kernelDecl->getNameInfo().getEndLoc(); - SourceLocation kernelArgListStart = clang::Lexer::findLocationAfterToken( - sl, clang::tok::l_paren, *SM, DefaultLangOptions, true); + SourceLocation kernelArgListStart = Lexer::findLocationAfterToken( + sl, tok::l_paren, *SM, DefaultLangOptions, true); DEBUG(dbgs() << kernelArgListStart.printToString(*SM)); if (kernelDecl->getNumParams() > 0) { const ParmVarDecl *pvdFirst = kernelDecl->getParamDecl(0); @@ -490,7 +1153,7 @@ public: kernelDecl->getParamDecl(kernelDecl->getNumParams() - 1); SourceLocation kernelArgListStart(pvdFirst->getLocStart()); SourceLocation kernelArgListEnd(pvdLast->getLocEnd()); - SourceLocation stop = clang::Lexer::getLocForEndOfToken( + SourceLocation stop = Lexer::getLocForEndOfToken( kernelArgListEnd, 0, *SM, DefaultLangOptions); replacementLength += SM->getCharacterData(stop) - SM->getCharacterData(kernelArgListStart); @@ -510,25 +1173,39 @@ public: LangOptions DefaultLangOptions; if (const CallExpr *call = - Result.Nodes.getNodeAs("cudaCall")) { + Result.Nodes.getNodeAs("cudaCall")) { const FunctionDecl *funcDcl = call->getDirectCallee(); StringRef name = funcDcl->getDeclName().getAsString(); const auto found = N.cuda2hipRename.find(name); if (found != N.cuda2hipRename.end()) { - countReps[found->second.countType]++; StringRef repName = found->second.hipName; SourceLocation sl = call->getLocStart(); - Replacement Rep(*SM, SM->isMacroArgExpansion(sl) - ? SM->getImmediateSpellingLoc(sl) - : sl, - name.size(), repName); - Replace->insert(Rep); + size_t length = name.size(); + bool bReplace = true; + if (SM->isMacroArgExpansion(sl)) { + sl = SM->getImmediateSpellingLoc(sl); + } + else if (SM->isMacroBodyExpansion(sl)) { + SourceLocation sl_macro = SM->getExpansionLoc(sl); + SourceLocation sl_end = Lexer::getLocForEndOfToken(sl_macro, 0, *SM, DefaultLangOptions); + length = SM->getCharacterData(sl_end) - SM->getCharacterData(sl_macro); + StringRef macroName = StringRef(SM->getCharacterData(sl_macro), length); + if (N.cudaExcludes.end() != N.cudaExcludes.find(macroName)) { + bReplace = false; + } else { + sl = sl_macro; + } + } + if (bReplace) { + countReps[found->second.countType]++; + Replacement Rep(*SM, sl, length, repName); + Replace->insert(Rep); + } } } if (const CUDAKernelCallExpr *launchKernel = - Result.Nodes.getNodeAs( - "cudaLaunchKernel")) { + Result.Nodes.getNodeAs("cudaLaunchKernel")) { SmallString<40> XStr; raw_svector_ostream OS(XStr); StringRef calleeName; @@ -562,7 +1239,7 @@ public: SourceLocation sl(arg->getLocStart()); SourceLocation el(arg->getLocEnd()); SourceLocation stop = - clang::Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); + Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); StringRef outs(SM->getCharacterData(sl), SM->getCharacterData(stop) - SM->getCharacterData(sl)); DEBUG(dbgs() << "args[ " << argno << "]" << outs << " <" @@ -581,7 +1258,7 @@ public: SourceLocation sl(arg->getLocStart()); SourceLocation el(arg->getLocEnd()); SourceLocation stop = - clang::Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); + Lexer::getLocForEndOfToken(el, 0, *SM, DefaultLangOptions); std::string outs(SM->getCharacterData(sl), SM->getCharacterData(stop) - SM->getCharacterData(sl)); DEBUG(dbgs() << outs << "\n"); @@ -590,7 +1267,7 @@ public: XStr.pop_back(); OS << ")"; size_t length = - SM->getCharacterData(clang::Lexer::getLocForEndOfToken( + SM->getCharacterData(Lexer::getLocForEndOfToken( launchKernel->getLocEnd(), 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchKernel->getLocStart()); Replacement Rep(*SM, launchKernel->getLocStart(), length, OS.str()); @@ -599,14 +1276,14 @@ public: } if (const FunctionTemplateDecl *templateDecl = - Result.Nodes.getNodeAs( + Result.Nodes.getNodeAs( "unresolvedTemplateName")) { FunctionDecl *kernelDecl = templateDecl->getTemplatedDecl(); convertKernelDecl(kernelDecl, Result); } if (const MemberExpr *threadIdx = - Result.Nodes.getNodeAs("cudaBuiltin")) { + Result.Nodes.getNodeAs("cudaBuiltin")) { if (const OpaqueValueExpr *refBase = dyn_cast(threadIdx->getBase())) { if (const DeclRefExpr *declRef = @@ -630,7 +1307,7 @@ public: } if (const DeclRefExpr *cudaEnumConstantRef = - Result.Nodes.getNodeAs("cudaEnumConstantRef")) { + Result.Nodes.getNodeAs("cudaEnumConstantRef")) { StringRef name = cudaEnumConstantRef->getDecl()->getNameAsString(); const auto found = N.cuda2hipRename.find(name); if (found != N.cuda2hipRename.end()) { @@ -643,9 +1320,14 @@ public: } if (const VarDecl *cudaEnumConstantDecl = - Result.Nodes.getNodeAs("cudaEnumConstantDecl")) { + Result.Nodes.getNodeAs("cudaEnumConstantDecl")) { StringRef name = cudaEnumConstantDecl->getType()->getAsTagDecl()->getNameAsString(); + // anonymous typedef enum + if (name.empty()) { + QualType QT = cudaEnumConstantDecl->getType().getUnqualifiedType(); + name = QT.getAsString(); + } const auto found = N.cuda2hipRename.find(name); if (found != N.cuda2hipRename.end()) { countReps[found->second.countType]++; @@ -656,8 +1338,26 @@ public: } } + if (const VarDecl *cudaTypedefVar = + Result.Nodes.getNodeAs("cudaTypedefVar")) { + QualType QT = cudaTypedefVar->getType(); + if (QT->isArrayType()) { + QT = QT.getTypePtr()->getAsArrayTypeUnsafe()->getElementType(); + } + QT = QT.getUnqualifiedType(); + StringRef name = QT.getAsString(); + const auto found = N.cuda2hipRename.find(name); + if (found != N.cuda2hipRename.end()) { + countReps[found->second.countType]++; + StringRef repName = found->second.hipName; + SourceLocation sl = cudaTypedefVar->getLocStart(); + Replacement Rep(*SM, sl, name.size(), repName); + Replace->insert(Rep); + } + } + if (const VarDecl *cudaStructVar = - Result.Nodes.getNodeAs("cudaStructVar")) { + Result.Nodes.getNodeAs("cudaStructVar")) { StringRef name = cudaStructVar->getType() ->getAsStructureType() ->getDecl() @@ -674,7 +1374,7 @@ public: } if (const VarDecl *cudaStructVarPtr = - Result.Nodes.getNodeAs("cudaStructVarPtr")) { + Result.Nodes.getNodeAs("cudaStructVarPtr")) { const Type *t = cudaStructVarPtr->getType().getTypePtrOrNull(); if (t) { StringRef name = t->getPointeeCXXRecordDecl()->getName(); @@ -691,7 +1391,7 @@ public: } if (const ParmVarDecl *cudaParamDecl = - Result.Nodes.getNodeAs("cudaParamDecl")) { + Result.Nodes.getNodeAs("cudaParamDecl")) { QualType QT = cudaParamDecl->getOriginalType().getUnqualifiedType(); StringRef name = QT.getAsString(); const Type *t = QT.getTypePtr(); @@ -710,7 +1410,7 @@ public: } if (const ParmVarDecl *cudaParamDeclPtr = - Result.Nodes.getNodeAs("cudaParamDeclPtr")) { + Result.Nodes.getNodeAs("cudaParamDeclPtr")) { const Type *pt = cudaParamDeclPtr->getType().getTypePtrOrNull(); if (pt) { QualType QT = pt->getPointeeType(); @@ -731,7 +1431,7 @@ public: } if (const StringLiteral *stringLiteral = - Result.Nodes.getNodeAs("stringLiteral")) { + Result.Nodes.getNodeAs("stringLiteral")) { if (stringLiteral->getCharByteWidth() == 1) { StringRef s = stringLiteral->getString(); processString(s, N, Replace, *SM, stringLiteral->getLocStart(), @@ -740,7 +1440,7 @@ public: } if (const UnaryExprOrTypeTraitExpr *expr = - Result.Nodes.getNodeAs( + Result.Nodes.getNodeAs( "cudaStructSizeOf")) { TypeSourceInfo *typeInfo = expr->getArgumentTypeInfo(); QualType QT = typeInfo->getType().getUnqualifiedType(); @@ -769,11 +1469,10 @@ private: } // end anonymous namespace // Set up the command line options -static cl::opt -InputFilename(cl::Positional, cl::desc(""), cl::init("-")); +static cl::OptionCategory ToolTemplateCategory("CUDA to HIP source translator options"); static cl::opt OutputFilename("o", cl::desc("Output filename"), - cl::value_desc("filename")); + cl::value_desc("filename"), cl::cat(ToolTemplateCategory)); static cl::opt Inplace("inplace", @@ -796,13 +1495,13 @@ int main(int argc, const char **argv) { int Result; - std::unique_ptr Compilations( - new FixedCompilationDatabase(".",std::vector())); - cl::ParseCommandLineOptions(argc, argv); + CommonOptionsParser OptionsParser(argc, argv, ToolTemplateCategory, llvm::cl::Required); + + std::vector fileSources = OptionsParser.getSourcePathList(); std::string dst = OutputFilename; if (dst.empty()) { - dst = InputFilename; + dst = fileSources[0]; if (!Inplace) { size_t pos = dst.rfind(".cu"); if (pos != std::string::npos) { @@ -820,65 +1519,75 @@ int main(int argc, const char **argv) { } // copy source file since tooling makes changes "inplace" - std::ifstream source(InputFilename, std::ios::binary); + std::ifstream source(fileSources[0], std::ios::binary); std::ofstream dest(Inplace ? dst + ".prehip" : dst, std::ios::binary); dest << source.rdbuf(); source.close(); dest.close(); - RefactoringTool Tool(*Compilations, dst); + RefactoringTool Tool(OptionsParser.getCompilations(), dst); ast_matchers::MatchFinder Finder; Cuda2HipCallback Callback(&Tool.getReplacements(), &Finder); HipifyPPCallbacks PPCallbacks(&Tool.getReplacements()); Finder.addMatcher(callExpr(isExpansionInMainFile(), - callee(functionDecl(matchesName("cuda.*")))) - .bind("cudaCall"), - &Callback); + callee(functionDecl(matchesName("cuda.*|cublas.*")))) + .bind("cudaCall"), + &Callback); Finder.addMatcher(cudaKernelCallExpr().bind("cudaLaunchKernel"), &Callback); Finder.addMatcher(memberExpr(isExpansionInMainFile(), hasObjectExpression(hasType(cxxRecordDecl( - matchesName("__cuda_builtin_"))))) - .bind("cudaBuiltin"), - &Callback); + matchesName("__cuda_builtin_"))))) + .bind("cudaBuiltin"), + &Callback); Finder.addMatcher(declRefExpr(isExpansionInMainFile(), - to(enumConstantDecl(matchesName("cuda.*")))) - .bind("cudaEnumConstantRef"), - &Callback); - Finder.addMatcher( - varDecl(isExpansionInMainFile(), hasType(enumDecl(matchesName("cuda.*")))) - .bind("cudaEnumConstantDecl"), - &Callback); + to(enumConstantDecl( + matchesName("cuda.*|cublas.*|CUDA.*|CUBLAS*")))) + .bind("cudaEnumConstantRef"), + &Callback); Finder.addMatcher(varDecl(isExpansionInMainFile(), - hasType(cxxRecordDecl(matchesName("cuda.*")))) - .bind("cudaStructVar"), - &Callback); - Finder.addMatcher( - varDecl(isExpansionInMainFile(), - hasType(pointsTo(cxxRecordDecl(matchesName("cuda.*"))))) - .bind("cudaStructVarPtr"), - &Callback); + hasType(enumDecl())) + .bind("cudaEnumConstantDecl"), + &Callback); + Finder.addMatcher(varDecl(isExpansionInMainFile(), + hasType(typedefDecl(matchesName("cuda.*|cublas.*")))) + .bind("cudaTypedefVar"), + &Callback); + // Array of elements of typedef type, Example: cudaStream_t streams[2]; + Finder.addMatcher(varDecl(isExpansionInMainFile(), + hasType(arrayType(hasElementType(typedefType( + hasDeclaration(typedefDecl(matchesName("cuda.*|cublas.*")))))))) + .bind("cudaTypedefVar"), + &Callback); + Finder.addMatcher(varDecl(isExpansionInMainFile(), + hasType(cxxRecordDecl(matchesName("cuda.*|cublas.*")))) + .bind("cudaStructVar"), + &Callback); + Finder.addMatcher(varDecl(isExpansionInMainFile(), + hasType(pointsTo(cxxRecordDecl( + matchesName("cuda.*|cublas.*"))))) + .bind("cudaStructVarPtr"), + &Callback); Finder.addMatcher(parmVarDecl(isExpansionInMainFile(), - hasType(namedDecl(matchesName("cuda.*")))) - .bind("cudaParamDecl"), - &Callback); - Finder.addMatcher( - parmVarDecl(isExpansionInMainFile(), - hasType(pointsTo(namedDecl(matchesName("cuda.*"))))) - .bind("cudaParamDeclPtr"), - &Callback); + hasType(namedDecl(matchesName("cuda.*|cublas.*")))) + .bind("cudaParamDecl"), + &Callback); + Finder.addMatcher(parmVarDecl(isExpansionInMainFile(), + hasType(pointsTo(namedDecl( + matchesName("cuda.*|cublas.*"))))) + .bind("cudaParamDeclPtr"), + &Callback); Finder.addMatcher(expr(isExpansionInMainFile(), sizeOfExpr(hasArgumentOfType(recordType(hasDeclaration( - cxxRecordDecl(matchesName("cuda.*"))))))) + cxxRecordDecl(matchesName("cuda.*|cublas.*"))))))) .bind("cudaStructSizeOf"), - &Callback); - Finder.addMatcher( - stringLiteral(isExpansionInMainFile()).bind("stringLiteral"), &Callback); + &Callback); + Finder.addMatcher(stringLiteral(isExpansionInMainFile()).bind("stringLiteral"), + &Callback); auto action = newFrontendActionFactory(&Finder, &PPCallbacks); std::vector compilationStages; compilationStages.push_back("--cuda-host-only"); - //compilationStages.push_back("--cuda-device-only"); for (auto Stage : compilationStages) { Tool.appendArgumentsAdjuster( @@ -931,7 +1640,7 @@ int main(int argc, const char **argv) { llvm::outs() << counterNames[i] << ':' << Callback.countReps[i] + PPCallbacks.countReps[i] << ' '; } - llvm::outs() << ") in \'" << InputFilename << "\'\n"; + llvm::outs() << ") in \'" << fileSources[0] << "\'\n"; } return Result; } diff --git a/projects/hip/include/hcc_detail/hip_runtime.h b/projects/hip/include/hcc_detail/hip_runtime.h index 21da60631a..22095b342d 100644 --- a/projects/hip/include/hcc_detail/hip_runtime.h +++ b/projects/hip/include/hcc_detail/hip_runtime.h @@ -37,9 +37,8 @@ THE SOFTWARE. #include #include - - - +// Define NVCC_COMPAT for CUDA compatibility +#define NVCC_COMPAT #define CUDA_SUCCESS hipSuccess #include diff --git a/projects/hip/src/device_util.cpp b/projects/hip/src/device_util.cpp index a307a1e377..ca9d858981 100644 --- a/projects/hip/src/device_util.cpp +++ b/projects/hip/src/device_util.cpp @@ -836,7 +836,7 @@ __device__ float erfcf(float x) } __device__ float erfcinvf(float y) { - return __hip_erfinvf(1 - y); + return __hip_erfinvf(1 - y); } __device__ float erfcxf(float x) { @@ -1697,75 +1697,111 @@ __device__ unsigned int test__popc(unsigned int input); __device__ unsigned int __popcll( unsigned long long int input) { - return hc::__popcount_u32_b64(input); + return hc::__popcount_u32_b64(input); } __device__ unsigned int __clz(unsigned int input) { - return hc::__firstbit_u32_u32( input); +#ifdef NVCC_COMPAT + return input == 0 ? 32 : hc::__firstbit_u32_u32( input); +#else + return hc::__firstbit_u32_u32( input); +#endif } __device__ unsigned int __clzll(unsigned long long int input) { - return hc::__firstbit_u32_u64( input); +#ifdef NVCC_COMPAT + return input == 0 ? 64 : hc::__firstbit_u32_u64( input); +#else + return hc::__firstbit_u32_u64( input); +#endif } -__device__ unsigned int __clz(int input) +__device__ unsigned int __clz( int input) { - return hc::__firstbit_u32_s32( input); +#ifdef NVCC_COMPAT + return input == 0 ? 32 : hc::__firstbit_u32_s32( input); +#else + return hc::__firstbit_u32_s32( input); +#endif } -__device__ unsigned int __clzll(long long int input) +__device__ unsigned int __clzll( long long int input) { - return hc::__firstbit_u32_s64( input); +#ifdef NVCC_COMPAT + return input == 0 ? 64 : hc::__firstbit_u32_s64( input); +#else + return hc::__firstbit_u32_s64( input); +#endif } __device__ unsigned int __ffs(unsigned int input) { - return hc::__lastbit_u32_u32( input)+1; +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_u32( input)+1; +#else + return hc::__lastbit_u32_u32( input); +#endif } __device__ unsigned int __ffsll(unsigned long long int input) { - return hc::__lastbit_u32_u64( input)+1; +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_u64( input)+1; +#else + return hc::__lastbit_u32_u64( input); +#endif } -__device__ unsigned int __ffs(int input) +__device__ unsigned int __ffs( int input) { - return hc::__lastbit_u32_s32( input)+1; +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_s32( input)+1; +#else + return hc::__lastbit_u32_s32( input); +#endif } -__device__ unsigned int __ffsll(long long int input) +__device__ unsigned int __ffsll( long long int input) { - return hc::__lastbit_u32_s64( input)+1; +#ifdef NVCC_COMPAT + return hc::__lastbit_u32_s64( input)+1; +#else + return hc::__lastbit_u32_s64( input); +#endif } __device__ unsigned int __brev( unsigned int input) { - return hc::__bitrev_b32( input); + return hc::__bitrev_b32( input); } __device__ unsigned long long int __brevll( unsigned long long int input) { - return hc::__bitrev_b64( input); + return hc::__bitrev_b64( input); } // warp vote function __all __any __ballot __device__ int __all( int input) { - return hc::__all( input); + return hc::__all( input); } __device__ int __any( int input) { - if( hc::__any( input)!=0) return 1; - else return 0; +#ifdef NVCC_COMPAT + if( hc::__any( input)!=0) return 1; + else return 0; +#else + return hc::__any( input); +#endif } __device__ unsigned long long int __ballot( int input) { - return hc::__ballot( input); + return hc::__ballot( input); } // warp shuffle functions @@ -1809,11 +1845,11 @@ __device__ float __shfl_xor(float input, int lane_mask, int width) return hc::__shfl_xor(input,lane_mask,width); } -__host__ __device__ int min(int arg1, int arg2) -{ +__host__ __device__ int min(int arg1, int arg2) +{ return (int)(hc::precise_math::fmin((float)arg1, (float)arg2)); } -__host__ __device__ int max(int arg1, int arg2) +__host__ __device__ int max(int arg1, int arg2) { return (int)(hc::precise_math::fmax((float)arg1, (float)arg2)); } diff --git a/projects/hip/tests/src/deviceLib/hip_anyall.cpp b/projects/hip/tests/src/deviceLib/hip_anyall.cpp index 21e24d6443..2804e6211b 100644 --- a/projects/hip/tests/src/deviceLib/hip_anyall.cpp +++ b/projects/hip/tests/src/deviceLib/hip_anyall.cpp @@ -27,7 +27,7 @@ THE SOFTWARE. #include #define HIP_ASSERT(x) (assert((x)==hipSuccess)) -__global__ void +__global__ void warpvote(hipLaunchParm lp, int* device_any, int* device_all , int Num_Warps_per_Block, int pshift) { @@ -36,13 +36,11 @@ __global__ void device_all[hipThreadIdx_x>>pshift] = __all(tid -77); } - - int main(int argc, char *argv[]) { int warpSize, pshift; hipDeviceProp_t devProp; hipGetDeviceProperties(&devProp, 0); - if(strncmp(devProp.name,"Fiji",1)==0) + if(strncmp(devProp.name,"Fiji",1)==0) { warpSize =64; pshift =6; } @@ -53,14 +51,14 @@ int main(int argc, char *argv[]) int Num_Blocks_per_Grid = 1; int Num_Warps_per_Block = Num_Threads_per_Block/warpSize; int Num_Warps_per_Grid = (Num_Threads_per_Block*Num_Blocks_per_Grid)/warpSize; - + int * host_any = ( int*)malloc(Num_Warps_per_Grid*sizeof(int)); int * host_all = ( int*)malloc(Num_Warps_per_Grid*sizeof(int)); - int *device_any; + int *device_any; int *device_all; HIP_ASSERT(hipMalloc((void**)&device_any,Num_Warps_per_Grid*sizeof( int))); HIP_ASSERT(hipMalloc((void**)&device_all,Num_Warps_per_Grid*sizeof(int))); -for (int i=0; i #include "hip_runtime.h" - #define HIP_ASSERT(x) (assert((x)==hipSuccess)) - - -#define WIDTH 32 -#define HEIGHT 32 - +#define WIDTH 8 +#define HEIGHT 8 #define NUM (WIDTH*HEIGHT) #define THREADS_PER_BLOCK_X 8 @@ -43,41 +39,41 @@ THE SOFTWARE. unsigned int firstbit_u32(unsigned int a) { if (a == 0) +{ +#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT ) + return -1; +#else + return 32; +#endif +} unsigned int pos = 0; while ((int )a > 0) { a <<= 1; pos++; } return pos; } -unsigned int firstbit_s32(int a) -{ - unsigned int u = a >= 0? a: ~a; // complement negative numbers - return firstbit_u32(u); -} unsigned int firstbit_u64(unsigned long long int a) { if (a == 0) +{ +#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT ) return -1; +#else + return 64; +#endif +} unsigned int pos = 0; while ((long long int)a > 0) { a <<= 1; pos++; } return pos; } -unsigned int firstbit_s64(long long int a) -{ - unsigned long long int u = a >= 0? a: ~a; // complement negative numbers - return firstbit_u64(u); -} - - __global__ void HIP_kernel(hipLaunchParm lp, - unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, - unsigned int* e, int* f,unsigned int* g, long long int* h, int width, int height) + unsigned int* a, unsigned int* b,unsigned int* c, unsigned long long int* d, int width, int height) { int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; @@ -87,14 +83,9 @@ HIP_kernel(hipLaunchParm lp, if ( i < (width * height)) { a[i] = __clz(b[i]); c[i] = __clzll(d[i]); - e[i] = __clz(f[i]); - g[i] = __clzll(h[i]); } - } - - using namespace std; int main() { @@ -103,19 +94,11 @@ int main() { unsigned int* hostB; unsigned int* hostC; unsigned long long int* hostD; - unsigned int* hostE; - int* hostF; - unsigned int* hostG; - long long int* hostH; unsigned int* deviceA; unsigned int* deviceB; unsigned int* deviceC; unsigned long long int* deviceD; - unsigned int* deviceE; - int* deviceF; - unsigned int* deviceG; - long long int* deviceH; hipDeviceProp_t devProp; hipGetDeviceProperties(&devProp, 0); @@ -125,57 +108,56 @@ int main() { cout << "hip Device prop succeeded " << endl ; - - int i; + unsigned int i; int errors; hostA = (unsigned int*)malloc(NUM * sizeof(unsigned int)); hostB = (unsigned int*)malloc(NUM * sizeof(unsigned int)); hostC = (unsigned int*)malloc(NUM * sizeof(unsigned int)); hostD = (unsigned long long int*)malloc(NUM * sizeof(unsigned long long int)); - hostE = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostF = (int*)malloc(NUM * sizeof(int)); - hostG = (unsigned int*)malloc(NUM * sizeof(unsigned int)); - hostH = (long long int*)malloc(NUM * sizeof(long long int)); // initialize the input data for (i = 0; i < NUM; i++) { - hostB[i] = i; - hostD[i] = 1099511627776+i; - hostF[i] = -2100+i; - hostH[i] = 1099511627776+i; + hostB[i] = 419430*i; + hostD[i] = i; } HIP_ASSERT(hipMalloc((void**)&deviceA, NUM * sizeof(unsigned int))); HIP_ASSERT(hipMalloc((void**)&deviceB, NUM * sizeof(unsigned int))); HIP_ASSERT(hipMalloc((void**)&deviceC, NUM * sizeof(unsigned int))); HIP_ASSERT(hipMalloc((void**)&deviceD, NUM * sizeof(unsigned long long int))); - HIP_ASSERT(hipMalloc((void**)&deviceE, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceF, NUM * sizeof(int))); - HIP_ASSERT(hipMalloc((void**)&deviceG, NUM * sizeof(unsigned int))); - HIP_ASSERT(hipMalloc((void**)&deviceH, NUM * sizeof(long long int))); HIP_ASSERT(hipMemcpy(deviceB, hostB, NUM*sizeof(unsigned int), hipMemcpyHostToDevice)); HIP_ASSERT(hipMemcpy(deviceD, hostD, NUM*sizeof(unsigned long long int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceF, hostF, NUM*sizeof(int), hipMemcpyHostToDevice)); - HIP_ASSERT(hipMemcpy(deviceH, hostD, NUM*sizeof(long long int), hipMemcpyHostToDevice)); hipLaunchKernel(HIP_kernel, dim3(WIDTH/THREADS_PER_BLOCK_X, HEIGHT/THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, - deviceA ,deviceB, deviceC,deviceD ,deviceE ,deviceF, deviceG,deviceH, WIDTH ,HEIGHT); + deviceA ,deviceB, deviceC ,deviceD , WIDTH ,HEIGHT); HIP_ASSERT(hipMemcpy(hostA, deviceA, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost)); HIP_ASSERT(hipMemcpy(hostC, deviceC, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostE, deviceE, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost)); - HIP_ASSERT(hipMemcpy(hostG, deviceG, NUM*sizeof(unsigned int), hipMemcpyDeviceToHost)); // verify the results errors = 0; for (i = 0; i < NUM; i++) { - if (hostA[i] != firstbit_u32(hostB[i])) { + printf("gpu_clz =%d, cpu_clz =%d \n",hostA[i],firstbit_u32(hostB[i])); + if (hostA[i] != firstbit_u32(hostB[i])) { + errors++; + } + } + if (errors!=0) { + cout << "FAILED clz" << endl; + return -1; + } else { + cout << "__clz() checked!" << endl; + } + errors = 0; + for (i = 0; i < NUM; i++) { + printf("gpu_clzll =%d, cpu_clzll =%d \n",hostC[i],firstbit_u64(hostD[i])); + if (hostC[i] != firstbit_u64(hostD[i])) { errors++; } } @@ -183,43 +165,7 @@ int main() { cout << "FAILED clz" << endl; return -1; } else { - cout << "__clz_u() for unsigned checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostC[i] != firstbit_u64(hostD[i])) { - errors++; - } - } - if (errors!=0) { - cout << "FAILED clz" << endl; - return -1; - } else { - cout << "__clzll_u() for unsigned checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostE[i] != firstbit_s32(hostF[i])) { - errors++; - } - } - if (errors!=0) { - cout << "FAILED clz\n" << endl; - return -1; - } else { - cout << "__clz_s() checked!" << endl; - } - errors = 0; - for (i = 0; i < NUM; i++) { - if (hostG[i] != firstbit_s64(hostH[i])) { - errors++; - } - } - if (errors!=0) { - cout << "FAILED clz" << endl; - return -1; - } else { - cout << "__clzll_s() checked!" << endl; + cout << "__clzll() checked!" << endl; } cout << "clz test PASSED!" << endl; @@ -228,19 +174,11 @@ int main() { HIP_ASSERT(hipFree(deviceB)); HIP_ASSERT(hipFree(deviceC)); HIP_ASSERT(hipFree(deviceD)); - HIP_ASSERT(hipFree(deviceE)); - HIP_ASSERT(hipFree(deviceF)); - HIP_ASSERT(hipFree(deviceG)); - HIP_ASSERT(hipFree(deviceH)); free(hostA); free(hostB); free(hostC); free(hostD); - free(hostE); - free(hostF); - free(hostG); - free(hostH); return errors; } diff --git a/projects/hip/tests/src/deviceLib/hip_ffs.cpp b/projects/hip/tests/src/deviceLib/hip_ffs.cpp index a84ab7b268..77d31a6776 100644 --- a/projects/hip/tests/src/deviceLib/hip_ffs.cpp +++ b/projects/hip/tests/src/deviceLib/hip_ffs.cpp @@ -31,8 +31,8 @@ THE SOFTWARE. #define HIP_ASSERT(x) (assert((x)==hipSuccess)) -#define WIDTH 32 -#define HEIGHT 32 +#define WIDTH 8 +#define HEIGHT 8 #define NUM (WIDTH*HEIGHT) @@ -44,12 +44,20 @@ template int lastbit( T a) { if (a == 0) +#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT ) + return -1; +#else return 0; +#endif int pos = 1; while ((a&1) != 1) { a >>= 1; pos++; } - return pos; +#if defined (__HIP_PLATFORM_HCC__) && !defined ( NVCC_COMPAT ) + return pos-1; +#else + return pos; +#endif } @@ -130,6 +138,7 @@ int main() { // verify the results errors = 0; for (i = 0; i < NUM; i++) { + printf("gpu_ffs =%d, cpu_ffs =%d \n",hostA[i],lastbit(hostB[i])); if (hostA[i] != lastbit(hostB[i])) { errors++; } @@ -142,6 +151,7 @@ int main() { } errors = 0; for (i = 0; i < NUM; i++) { + printf("gpu_ffsll =%d, cpu_ffsll =%d \n",hostC[i],lastbit(hostD[i])); if (hostC[i] != lastbit(hostD[i])) { errors++; }