#!/usr/bin/perl -w ## # Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. ## #usage hipify [OPTIONS] INPUT_FILE use Getopt::Long; my $warn_whitelist =""; GetOptions( "print-stats" => \$print_stats # print the command-line, like a header. , "count-conversions" => \$count_conversions # count conversions. , "quiet-warnings" => \$quiet_warnings # don't print warnings on unknown CUDA functions. , "warn-whitelist=s"=> \$warn_whitelist , "no-translate-builtins" => \$no_translate_builtins # don't translate math functions. , "no-translate-textures" => \$no_translate_textures , "no-output" => \$no_output # don't write any translated output to stdout. , "inplace" => \$inplace # modify input file inplace, replacing input with hipified output, save backup in ".prehip" file. # If .prehip file exists, use that as input to hip. , "n" => \$n # combination of print_stats + no-output. ); $print_stats = 1 if $n; $no_output = 1 if $n; # These uses of cuda[A-Z] are commonly used in CUDA code but don't actually map to any CUDA API: # TODO - use a hash lookup for these. @warn_whitelist = ( "cudaError" ,"cudaStatus" ,"cudaDevice" ,"cudaDevice_t" ,"cudaIDs" ,"cudaGridDim" ,"cudaDimGrid" ,"cudaDimBlock" ,"cudaDeviceId" ,"cudaDevices", ,"cudaGradOutput", ,"cudaInput", ,"cudaOutput", ,"cudaGradInput", ,"cudaIndices", ,"cudaColorSpinorField" ,"cudaGaugeField" ,"cudaMom" ,"cudaGauge" ,"cudaInGauge" ,"cudaGaugeField" ,"cudaColorSpinorField" ,"cudaSiteLink" ,"cudaFatLink" ,"cudaStaple" ,"cudaCloverField" ,"cudaFatLink" ,"cudaParam" ); #print "WW=@warn_whitelist\n"; # Allow users to add their own functions. push (@warn_whitelist, split(',',$warn_whitelist)); #--- #Stats tracking code: @statNames = ("dev", "mem", "kern", 'coord_func', "math_func", "special_func", "stream", "event", "err", "def", "tex", "extern_shared", "other"); #--- #Compute total of all individual counts: sub totalStats { my %count = %{ shift() }; my $total = 0; foreach $key (keys %count) { $total += $count{$key}; } return $total; }; #--- sub printStats { my $label = shift(); my @statNames = @{ shift() }; my %counts = %{ shift() }; my $warnings = shift(); my $loc = shift(); my $total = totalStats(\%counts); printf STDERR "%s %d CUDA->HIP refs( ", $label, $total; foreach $stat (@statNames) { printf STDERR "%s:%d ", $stat, $counts{$stat}; } printf STDERR ") warn:%d LOC:%d", $warnings, $loc; } #--- # Add adder stats to dest. Used to add stats for current file to a running total for all files: sub addStats { my $dest_ref = shift() ; my %adder = %{ shift() }; foreach $key (keys %adder) { $dest_ref->{$key} += $adder{$key}; #printf ("D{$key} += %d => %d\n", $adder{$key}, $dest{$key}); } } #--- sub clearStats { my $dest_ref = shift() ; my @statNames = @{ shift() }; foreach $stat (@statNames) { $dest_ref->{$stat} = 0; } } # count of transforms in all files: my %tt; clearStats(\%tt, \@statNames); $Twarnings = 0; $TlineCount = 0; my %TwarningTags ; my %Tkernels ; my $fileCount = @ARGV; my $fileName = ""; while (@ARGV) { $fileName=shift (@ARGV); if ($inplace) { my $file_prehip = "$fileName" . ".prehip"; my $infile; my $outfile; if (-e $file_prehip) { $infile = $file_prehip; $outfile = $fileName; } else { system ("cp $fileName $file_prehip"); $infile = $file_prehip; $outfile = $fileName; } open(INFILE,"<", $infile) or die "error: could not open $infile"; open(OUTFILE,">", $outfile) or die "error: could not open $outfile"; $OUTFILE = OUTFILE; } else { open(INFILE,"<", $fileName) or die "error: could not open $fileName"; $OUTFILE = STDOUT; } # Note : \b is used in perl to indicate the start of a word - typically that is what we want in this case: # # count of transforms in this file, init to 0 here: my %ft; clearStats(\%ft, \@statNames); my $countIncludes = 0; my $countKeywords = 0; # keywords like __global__, __shared__ - not converted by hipify but counted here. my $warnings = 0; my $warningsCublas = 0; my $warningsCurand = 0; my %warningTags; # hash with counts of particular unknown keywords. my $lineCount = 0; undef $/; # Read whole file at once, so we can match newlines. while () { #-------- # Compiler Defines # __CUDACC__ is set by NVCC to indicate it is treating the input file as CUDA code (as opposed to host) # Typically we want any code treated as CUDA code to be treated as accelerator code by Kalmar too # __HIPCC__ will set KALMARCC $ft{'def'} += s/\b__CUDACC__\b/__HIPCC__/g; # __CUDA_ARCH is often used to detect when a function or kernel is being compiled for the device. # Don't automaticall convert this - likely these will need special attention with HIP_ARCH_HAS_* macros #$ft{'def'} += s/\b__CUDA_ARCH__\b/__HIP_ARCH__/g; #-------- #Includes: $countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime\.h[>"]/$1/; $countIncludes += s/(\s*#\s*include\s+)[<"]cuda_runtime_api\.h[>"]/$1/; #-------- # Error codes and return types: $ft{'err'} += s/\bcudaError_t\b/hipError_t/g; $ft{'err'} += s/\bcudaError\b/hipError_t/g; $ft{'err'} += s/\bcudaSuccess\b/hipSuccess/g; $ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g; $ft{'err'} += s/\bcudaErrorMemoryAllocation\b/hipErrorMemoryAllocation/g; $ft{'err'} += s/\bcudaErrorMemoryFree\b/hipErrorMemoryFree/g; $ft{'err'} += s/\bcudaErrorUnknownSymbol\b/hipErrorUnknownSymbol/g; $ft{'err'} += s/\bcudaErrorOutOfResources\b/hipErrorOutOfResources/g; $ft{'err'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g; $ft{'err'} += s/\bcudaErrorInvalidResourceHandle\b/hipErrorInvalidResourceHandle/g; $ft{'err'} += s/\bcudaErrorInvalidDevice\b/hipErrorInvalidDevice/g; $ft{'err'} += s/\bcudaErrorNoDevice\b/hipErrorNoDevice/g; $ft{'err'} += s/\bcudaErrorNotReady\b/hipErrorNotReady/g; $ft{'err'} += s/\bcudaErrorUnknown\b/hipErrorUnknown/g; # error APIs: $ft{'err'} += s/\bcudaGetLastError\b/hipGetLastError/g; $ft{'err'} += s/\bcudaPeekAtLastError\b/hipPeekAtLastError/g; $ft{'err'} += s/\bcudaGetErrorName\b/hipGetErrorName/g; $ft{'err'} += s/\bcudaGetErrorString\b/hipGetErrorString/g; #-------- # Memcpy $ft{'mem'} += s/\bcudaMemcpy\b/hipMemcpy/g; $ft{'mem'} += s/\bcudaMemcpyHostToHost\b/hipMemcpyHostToHost/g; $ft{'mem'} += s/\bcudaMemcpyHostToDevice\b/hipMemcpyHostToDevice/g; $ft{'mem'} += s/\bcudaMemcpyDeviceToHost\b/hipMemcpyDeviceToHost/g; $ft{'mem'} += s/\bcudaMemcpyDeviceToDevice\b/hipMemcpyDeviceToDevice/g; $ft{'mem'} += s/\bcudaMemcpyDefault\b/hipMemcpyDefault/g; $ft{'mem'} += s/\bcudaMemcpyToSymbol\b/hipMemcpyToSymbol/g; $ft{'mem'} += s/\bcudaMemset\b/hipMemset/g; $ft{'mem'} += s/\bcudaMemsetAsync\b/hipMemsetAsync/g; $ft{'mem'} += s/\bcudaMemcpyAsync\b/hipMemcpyAsync/g; $ft{'mem'} += s/\bcudaMemGetInfo\b/hipMemGetInfo/g; $ft{'mem'} += s/\bcudaMemcpyKind\b/hipMemcpyKind/g; $ft{'mem'} += s/\bcudaPointerAttributes\b/hipPointerAttribute_t/g; $ft{'mem'} += s/\bcudaMemcpy2D\b/hipMemcpy2D/g; $ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g; #-------- # Memory management: $ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g; $ft{'mem'} += s/\bcudaMallocHost\b/hipHostMalloc/g; # note conversion to standard hipHost* naming convention $ft{'mem'} += s/\bcudaFree\b/hipFree/g; $ft{'mem'} += s/\bcudaFreeHost\b/hipHostFree/g; # note conversion to standard hipHost* naming convention $ft{'mem'} += s/\bcudaHostAlloc\b/hipHostMalloc/g; $ft{'mem'} += s/\bcudaHostGetDevicePointer\b/hipHostGetDevicePointer/g; $ft{'mem'} += s/\bcudaHostAllocDefault\b/hipHostMallocDefault/g; $ft{'mem'} += s/\bcudaHostAllocPortable\b/hipHostMallocPortable/g; $ft{'mem'} += s/\bcudaHostAllocMapped\b/hipHostMallocMapped/g; $ft{'mem'} += s/\bcudaHostAllocWriteCombined\b/hipHostMallocWriteCombined/g; $ft{'mem'} += s/\bcudaMallocArray\b/hipMallocArray/g; $ft{'mem'} += s/\bcudaMallocPitch\b/hipMallocPitch/g; #-------- # Coordinate Indexing and Dimensions: $ft{'coord_func'} += s/\bthreadIdx\.x\b/hipThreadIdx_x/g; $ft{'coord_func'} += s/\bthreadIdx\.y\b/hipThreadIdx_y/g; $ft{'coord_func'} += s/\bthreadIdx\.z\b/hipThreadIdx_z/g; $ft{'coord_func'} += s/\bblockIdx\.x\b/hipBlockIdx_x/g; $ft{'coord_func'} += s/\bblockIdx\.y\b/hipBlockIdx_y/g; $ft{'coord_func'} += s/\bblockIdx\.z\b/hipBlockIdx_z/g; $ft{'coord_func'} += s/\bblockDim\.x\b/hipBlockDim_x/g; $ft{'coord_func'} += s/\bblockDim\.y\b/hipBlockDim_y/g; $ft{'coord_func'} += s/\bblockDim\.z\b/hipBlockDim_z/g; $ft{'coord_func'} += s/\bgridDim\.x\b/hipGridDim_x/g; $ft{'coord_func'} += s/\bgridDim\.y\b/hipGridDim_y/g; $ft{'coord_func'} += s/\bgridDim\.z\b/hipGridDim_z/g; # hack to avoid replacing hipDeviceProp.warpSize call $ft{'special_func'} += s/([^.])\bwarpSize\b/$1hipWarpSize/g; #-------- # Events $ft{'event'} += s/\bcudaEvent_t\b/hipEvent_t/g; $ft{'event'} += s/\bcudaEventCreate\b/hipEventCreate/g; $ft{'event'} += s/\bcudaEventCreateWithFlags\b/hipEventCreateWithFlags/g; $ft{'event'} += s/\bcudaEventDestroy\b/hipEventDestroy/g; $ft{'event'} += s/\bcudaEventRecord\b/hipEventRecord/g; $ft{'event'} += s/\bcudaEventElapsedTime\b/hipEventElapsedTime/g; $ft{'event'} += s/\bcudaEventSynchronize\b/hipEventSynchronize/g; #-------- # Streams $ft{'stream'} += s/\bcudaStream_t\b/hipStream_t/g; $ft{'stream'} += s/\bcudaStreamCreate\b/hipStreamCreate/g; $ft{'stream'} += s/\bcudaStreamCreateWithFlags\b/hipStreamCreateWithFlags/g; $ft{'stream'} += s/\bcudaStreamDestroy\b/hipStreamDestroy/g; $ft{'stream'} += s/\bcudaStreamWaitEvent\b/hipStreamWaitEvent/g; $ft{'stream'} += s/\bcudaStreamSynchronize\b/hipStreamSynchronize/g; $ft{'stream'} += s/\bcudaStreamDefault\b/hipStreamDefault/g; $ft{'stream'} += s/\bcudaStreamNonBlocking\b/hipStreamNonBlocking/g; #-------- # Other synchronization $ft{'dev'} += s/\bcudaDeviceSynchronize\b/hipDeviceSynchronize/g; $ft{'dev'} += s/\bcudaThreadSynchronize\b/hipDeviceSynchronize/g; # translate deprecated cudaThreadSynchronize $ft{'dev'} += s/\bcudaDeviceReset\b/hipDeviceReset/g; $ft{'dev'} += s/\bcudaThreadExit\b/hipDeviceReset/g; # translate deprecated cudaThreadExit $ft{'dev'} += s/\bcudaSetDevice\b/hipSetDevice/g; $ft{'dev'} += s/\bcudaGetDevice\b/hipGetDevice/g; #-------- # Device $ft{'dev'} += s/\bcudaDeviceProp\b/hipDeviceProp_t/g; $ft{'dev'} += s/\bcudaGetDeviceProperties\b/hipGetDeviceProperties/g; # Attribute $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerBlock\b/hipDeviceAttributeMaxThreadsPerBlock/g; $ft{'err'} += s/\bcudaDevAttrMaxBlockDimX\b/hipDeviceAttributeMaxBlockDimX/g; $ft{'err'} += s/\bcudaDevAttrMaxBlockDimY\b/hipDeviceAttributeMaxBlockDimY/g; $ft{'err'} += s/\bcudaDevAttrMaxBlockDimZ\b/hipDeviceAttributeMaxBlockDimZ/g; $ft{'err'} += s/\bcudaDevAttrMaxGridDimX\b/hipDeviceAttributeMaxGridDimX/g; $ft{'err'} += s/\bcudaDevAttrMaxGridDimY\b/hipDeviceAttributeMaxGridDimY/g; $ft{'err'} += s/\bcudaDevAttrMaxGridDimZ\b/hipDeviceAttributeMaxGridDimZ/g; $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerBlock\b/hipDeviceAttributeMaxSharedMemoryPerBlock/g; $ft{'err'} += s/\bcudaDevAttrTotalConstantMemory\b/hipDeviceAttributeTotalConstantMemory/g; $ft{'err'} += s/\bcudaDevAttrWarpSize\b/hipDeviceAttributeWarpSize/g; $ft{'err'} += s/\bcudaDevAttrMaxRegistersPerBlock\b/hipDeviceAttributeMaxRegistersPerBlock/g; $ft{'err'} += s/\bcudaDevAttrClockRate\b/hipDeviceAttributeClockRate/g; $ft{'err'} += s/\bcudaDevAttrMultiProcessorCount\b/hipDeviceAttributeMultiprocessorCount/g; $ft{'err'} += s/\bcudaDevAttrComputeMode\b/hipDeviceAttributeComputeMode/g; $ft{'err'} += s/\bcudaDevAttrL2CacheSize\b/hipDeviceAttributeL2CacheSize/g; $ft{'err'} += s/\bcudaDevAttrMaxThreadsPerMultiProcessor\b/hipDeviceAttributeMaxThreadsPerMultiProcessor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMajor\b/hipDeviceAttributeComputeCapabilityMajor/g; $ft{'err'} += s/\bcudaDevAttrComputeCapabilityMinor\b/hipDeviceAttributeComputeCapabilityMinor/g; $ft{'err'} += s/\bcudaDevAttrConcurrentKernels\b/hipDeviceAttributeConcurrentKernels/g; $ft{'err'} += s/\bcudaDevAttrPciBusId\b/hipDeviceAttributePciBusId/g; $ft{'err'} += s/\bcudaDevAttrPciDeviceId\b/hipDeviceAttributePciDeviceId/g; $ft{'err'} += s/\bcudaDevAttrMaxSharedMemoryPerMultiprocessor\b/hipDeviceAttributeMaxSharedMemoryPerMultiprocessor/g; $ft{'err'} += s/\bcudaDevAttrMemoryClockRate\b/hipDeviceAttributeMemoryClockRate/g; $ft{'err'} += s/\bcudaDevAttrGlobalMemoryBusWidth\b/hipDeviceAttributeMemoryBusWidth/g; $ft{'dev'} += s/\bcudaDeviceAttr\b/hipDeviceAttribute_t/g; $ft{'dev'} += s/\bcudaDeviceGetAttribute\b/hipDeviceGetAttribute/g; # Cache config $ft{'dev'} += s/\bcudaDeviceSetCacheConfig\b/hipDeviceSetCacheConfig/g; $ft{'dev'} += s/\bcudaThreadSetCacheConfig\b/hipDeviceSetCacheConfig/g; # translate deprecated $ft{'dev'} += s/\bcudaDeviceGetCacheConfig\b/hipDeviceGetCacheConfig/g; $ft{'dev'} += s/\bcudaThreadGetCacheConfig\b/hipDeviceGetCacheConfig/g; # translate deprecated $ft{'dev'} += s/\bcudaFuncCache\b/hipFuncCache/g; $ft{'dev'} += s/\bcudaFuncCachePreferNone\b/hipFuncCachePreferNone/g; $ft{'dev'} += s/\bcudaFuncCachePreferShared\b/hipFuncCachePreferShared/g; $ft{'dev'} += s/\bcudaFuncCachePreferL1\b/hipFuncCachePreferL1/g; $ft{'dev'} += s/\bcudaFuncCachePreferEqual\b/hipFuncCachePreferEqual/g; # function $ft{'dev'} += s/\bcudaFuncSetCacheConfig\b/hipFuncSetCacheConfig/g; $ft{'dev'} += s/\bcudaDriverGetVersion\b/hipDriverGetVersion/g; #-------- # Peer2Peer $ft{'dev'} += s/\bcudaDeviceCanAccessPeer\b/hipDeviceCanAccessPeer/g; $ft{'dev'} += s/\bcudaDeviceDisablePeerAccess\b/hipDeviceDisablePeerAccess/g; $ft{'dev'} += s/\bcudaDeviceEnablePeerAccess\b/hipDeviceEnablePeerAccess/g; $ft{'mem'} += s/\bcudaMemcpyPeerAsync\b/hipMemcpyPeerAsync/g; $ft{'mem'} += s/\bcudaMemcpyPeer\b/hipMemcpyPeer/g; # Shared mem: $ft{'dev'} += s/\bcudaDeviceSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g; $ft{'dev'} += s/\bcudaThreadSetSharedMemConfig\b/hipDeviceSetSharedMemConfig/g; # translate deprecated $ft{'dev'} += s/\bcudaDeviceGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g; $ft{'dev'} += s/\bcudaThreadGetSharedMemConfig\b/hipDeviceGetSharedMemConfig/g; # translate deprecated $ft{'dev'} += s/\bcudaSharedMemConfig\b/hipSharedMemConfig/g; $ft{'dev'} += s/\bcudaSharedMemBankSizeDefault\b/hipSharedMemBankSizeDefault/g; $ft{'dev'} += s/\bcudaSharedMemBankSizeFourByte\b/hipSharedMemBankSizeFourByte/g; $ft{'dev'} += s/\bcudaSharedMemBankSizeEightByte\b/hipSharedMemBankSizeEightByte/g; $ft{'dev'} += s/\bcudaGetDeviceCount\b/hipGetDeviceCount/g; #-------- # Profiler #$aOt += s/\bcudaProfilerInitialize\b/hipProfilerInitialize/g; // see if these are called anywhere. $ft{'other'} += s/\bcudaProfilerStart\b/hipProfilerStart/g; $ft{'other'} += s/\bcudaProfilerStop\b/hipProfilerStop/g; $countKeywords += m/__global__/; $countKeywords += m/__shared__/; #-------- # CUDA extern __shared__ syntax # Note these only work if declaration is on a single line. { # match uses ? for <.*> which will be unitialized if this is not present in launch syntax. no warnings qw/uninitialized/; my $k = 0; # Match extern __shared__ type foo[]; syntax # Replace as HIP_DYNAMIC_SHARED() macro $k += s/extern\s+([\w\(\)]+)?\s*__shared__\s+([\w:<>\s]+)\s+(\w+)\s*\[\s*\]\s*;/HIP_DYNAMIC_SHARED($1 $2, $3)/g; # test patterns for the regular expression above: #'extern __shared__ double foo[];' #'extern __shared__ unsigned int foo[];' #'extern volatile __shared__ double foo[];' #'extern volatile __shared__ unsigned int sdata[];' #'extern __shared__ volatile unsigned int sdata[];' #'extern __shared__ T s[];' #'extern __shared__ T::type s[];' #'extern __shared__ blah::type s[];' #'extern __shared__ typename mapper::type s_data[];' #'extern __attribute__((used)) __shared__ typename mapper::type s_data[];' $ft{'extern_shared'} += $k; } #-------- # CUDA Launch Syntax # Note these only work if launch is on a single line. # Handle the <>> syntax: { # match uses ? for <.*> which will be unitialized if this is not present in launch syntax. no warnings qw/uninitialized/; my $k = 0; my $kernelName; # Handle the <>> syntax: $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, $6, /g; $kernelName = $1 if $k; # Handle the <>> syntax: $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*,\s*(.+)\s*>>>([\s*\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), $5, 0, /g; $kernelName = $1 if $k; # Handle the <>> syntax: $k += s/(\w+)\s*(<.*>)?\s*<<<\s*(.+)\s*,\s*(.+)\s*>>>([\s\\]*)\(/hipLaunchKernel(HIP_KERNEL_NAME($1$2), dim3($3), dim3($4), 0, 0, /g; $kernelName = $1 if $k; $ft{'kern'} += $k; if ($k) { $Tkernels{$kernelName} ++; } } unless ($no_translate_textures) { $ft{'tex'} += s/\bcudaChannelFormatDesc\b/hipChannelFormatDesc/g; $ft{'tex'} += s/\bcudaFilterModePoint\b/hipFilterModePoint/g; $ft{'tex'} += s/\bcudaReadModeElementType\b/hipReadModeElementType/g; $ft{'tex'} += s/\bcudaArray\b/hipArrary/g; $ft{'tex'} += s/\bcudaCreateChannelDesc\b/hipCreateChannelDesc/g; $ft{'tex'} += s/\bcudaBindTexture\b/hipBindTexture/g; $ft{'tex'} += s/\bcudaBindTextureToArray\b/hipBindTextureToArray/g; $ft{'tex'} += s/\bcudaUnbindTexture\b/hipUnbindTexture/g; } if ($count_conversions) { while (/(\bhip[A-Z]\w+\b)/g) { $convertedTags{$1}++; #print STDERR "HIP: $1 : ", $translateTags{$1}, "\n"; } } # guess that we are in device code , or at least in a file that calls device code. # will almost certainly call one of the coordiante functions - could be fooled by clever macros but usually works: my $hasDeviceCode = $countKeywords + $ft{'coord_func'} + $ft{'math_func'} + $ft{'special_func'}; unless ($quiet_warnings) { #print STDERR "Check WARNINGs\n"; # copy into array of lines, process line-by-line to show warnings: if ($hasDeviceCode or (/\bcuda/) or (/<<<.*>>>/) or (/(\bcublas[A-Z]\w+)/) or (/(\bcurand[A-Z]\w+)/) ) { my @lines = split /\n/, $_; my $tmp = $_; # copies the whole file, could be a little smarter here... my $line_num = 0; foreach (@lines) { $line_num ++; # remove any whitelisted words: foreach $w (@warn_whitelist) { s/\b$w\b/ZAP/ } my $tag ; if ((/(\bcuda[A-Z]\w+)/) or (/<<<.*>>>/)) { # flag any remaining code that look like cuda API calls, may want to add these to hipify $tag = (defined $1) ? $1 : "Launch"; } elsif (/(\bcublas[A-Z]\w+)/) { $warningsCublas++; $tag = $1; } elsif (/(\bcurand[A-Z]\w+)/) { $warningsCurand++; $tag = $1; } if (defined $tag) { $warnings++; $warningTags{$tag}++; print STDERR " warning: $fileName:#$line_num : $_"; print STDERR "\n"; } $s = warnUnsupportedSpecialFunctions($line_num); $warnings += $s; } $_ = $tmp; } } #-------- # Math libraries # To limit bogus translations, try to make sure we are in a kernel (ft{'builtin'} != 0): if (not $no_translate_builtins and ($hasDeviceCode > 0)) { $ft{'special_func'} += countSupportedSpecialFunctions(); } #-------- # Print it! # TODO - would like to move this code outside loop but it uses $_ which contains the whole file. unless ($no_output) { my $apiCalls = $ft{'err'} + $ft{'event'} + $ft{'mem'} + $ft{'stream'} + $ft{'dev'} + $ft{'def'} + $ft{'tex'} + $ft{'other'} + $ft{'math_func'}; my $kernStuff = $hasDeviceCode + $ft{'kern'}; my $totalCalls = $apiCalls + $kernStuff; $is_dos = m/\r\n$/; if ($totalCalls and ($countIncludes == 0) and ($kernStuff != 0)) { # If this file makes kernel builtin calls, and does not include the cuda_runtime.h, # then add an #include to match "magic" includes provided by NVCC. # This logic can miss cases where cuda_runtime.h is included by another include file. print $OUTFILE '#include "hip/hip_runtime.h"' . ($is_dos ? "\r\n" : "\n"); } print $OUTFILE "$_"; } $lineCount = $_ =~ tr/\n//; } my $totalConverted = totalStats(\%ft); #printf "TOTAL-CONV=%d\n", $totalConverted; if (($totalConverted+$warnings) and $print_stats) { printStats("info: converted", \@statNames, \%ft, $warnings, $lineCount); print STDERR " in '$fileName'\n"; } # Update totals for all files: addStats(\%tt, \%ft); $Twarnings += $warnings; $TlineCount += $lineCount; foreach $key (keys %warningTags) { $TwarningTags{$key} += $warningTags{$key}; } } #-- Print total stats for all files processed: if ($print_stats and ($fileCount > 1)) { print STDERR "\n"; printStats("info: TOTAL-converted", \@statNames, \%tt, $Twarnings, $TlineCount); print STDERR "\n"; foreach my $key (sort { $TwarningTags{$b} <=> $TwarningTags{$a} } keys %TwarningTags) { printf STDERR " warning: unconverted %s : %d\n", $key, $TwarningTags{$key}; } my $kernelCnt = keys %Tkernels; printf STDERR " kernels (%d total) : ", $kernelCnt; foreach my $key (sort { $Tkernels{$b} <=> $Tkernels{$a} } keys %Tkernels) { printf STDERR " %s(%d)", $key, $Tkernels{$key}; } print STDERR "\n"; print STDERR "\n"; } if ($count_conversions) { foreach my $key (sort { $convertedTags{$b} <=> $convertedTags{$a} } keys %convertedTags) { printf STDERR " %s %d\n", $key, $convertedTags{$key}; } } sub countSupportedSpecialFunctions { my $m = 0; #supported special functions: foreach $func ( # Synchronization: "__syncthreads", ) { # match math at the beginning of a word, but not if it already has a namespace qualifier ('::') : $m += m/[:]?[:]?\b($func)\b(\w*\()/g; } return $m; } sub warnUnsupportedSpecialFunctions { my $line_num = shift; my $m = 0; foreach $func ( # memory fence: "__threadfence_block", "__threadfence", "__threadfence_system", # Synchronization: "__syncthreads_count", "__syncthreads_and", "__syncthreads_or", # Read-only cache function: "__ldg", # Cross-lane and warp-vote instructions: #"__all", #"__any", #"__ballot", #"__popc", #"__clz", #"__shfl", #"__shfl_up", #"__shfl_down", #"__shfl_xor", "__prof_trigger", # too popular, and we can't tell if we are in device or host code. #"assert", #"printf", #"malloc", #"free", #"memset", #"memcpy" ) { # match math at the beginning of a word, but not if it already has a namespace qualifier ('::') : my $mt = m/[:]?[:]?\b($func)\b(\w*\()/g; if ($mt) { $m += $mt; print STDERR " warning: $fileName:#$line_num : unsupported device function : $_\n"; } } return $m; }